From f1bb6a059bc04cbb7833129c39dbcc706f3ed4bb Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Oct 2020 15:43:20 -0700 Subject: [PATCH 01/17] REF/TYP: use OpsMixin for arithmetic methods --- pandas/core/arraylike.py | 70 ++++++++++++++++ pandas/core/indexes/base.py | 27 ++---- pandas/core/indexes/range.py | 52 ++++-------- pandas/core/ops/__init__.py | 26 +----- pandas/core/ops/methods.py | 93 ++++++++++++--------- pandas/core/series.py | 18 ++++ pandas/tests/arithmetic/test_timedelta64.py | 2 +- 7 files changed, 166 insertions(+), 122 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 185e9197e01fe..553649212aa5f 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -72,3 +72,73 @@ def __xor__(self, other): @unpack_zerodim_and_defer("__rxor__") def __rxor__(self, other): return self._logical_method(other, roperator.rxor) + + # ------------------------------------------------------------- + # Arithmetic Methods + + def _arith_method(self, other, op): + return NotImplemented + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + return self._arith_method(other, operator.add) + + @unpack_zerodim_and_defer("__radd__") + def __radd__(self, other): + return self._arith_method(other, roperator.radd) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + return self._arith_method(other, operator.sub) + + @unpack_zerodim_and_defer("__rsub__") + def __rsub__(self, other): + return self._arith_method(other, roperator.rsub) + + @unpack_zerodim_and_defer("__mul__") + def __mul__(self, other): + return self._arith_method(other, operator.mul) + + @unpack_zerodim_and_defer("__rmul__") + def __rmul__(self, other): + return self._arith_method(other, roperator.rmul) + + @unpack_zerodim_and_defer("__truediv__") + def __truediv__(self, other): + return self._arith_method(other, operator.truediv) + + @unpack_zerodim_and_defer("__rtruediv__") + def __rtruediv__(self, other): + return self._arith_method(other, roperator.rtruediv) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + return self._arith_method(other, operator.floordiv) + + @unpack_zerodim_and_defer("__rfloordiv") + def __rfloordiv__(self, other): + return self._arith_method(other, roperator.rfloordiv) + + @unpack_zerodim_and_defer("__mod__") + def __mod__(self, other): + return self._arith_method(other, operator.mod) + + @unpack_zerodim_and_defer("__rmod__") + def __rmod__(self, other): + return self._arith_method(other, roperator.rmod) + + @unpack_zerodim_and_defer("__divmod__") + def __divmod__(self, other): + return self._arith_method(other, divmod) + + @unpack_zerodim_and_defer("__rdivmod__") + def __rdivmod__(self, other): + return self._arith_method(other, roperator.rdivmod) + + @unpack_zerodim_and_defer("__pow__") + def __pow__(self, other): + return self._arith_method(other, operator.pow) + + @unpack_zerodim_and_defer("__rpow__") + def __rpow__(self, other): + return self._arith_method(other, roperator.rpow) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 539f5515a2f8b..567115edb02eb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5404,29 +5404,17 @@ def _cmp_method(self, other, op): return result return ops.invalid_comparison(self, other, op) - @classmethod - def _add_numeric_methods_binary(cls): + def _arith_method(self, other, op): """ - Add in numeric methods. + Wrapper used to dispatch arithmetic operations. """ - cls.__add__ = _make_arithmetic_op(operator.add, cls) - cls.__radd__ = _make_arithmetic_op(ops.radd, cls) - cls.__sub__ = _make_arithmetic_op(operator.sub, cls) - cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls) - cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls) - cls.__pow__ = _make_arithmetic_op(operator.pow, cls) - cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) - cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) + from pandas import Series - cls.__mod__ = _make_arithmetic_op(operator.mod, cls) - cls.__rmod__ = _make_arithmetic_op(ops.rmod, cls) - cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls) - cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls) - cls.__divmod__ = _make_arithmetic_op(divmod, cls) - cls.__rdivmod__ = _make_arithmetic_op(ops.rdivmod, cls) - cls.__mul__ = _make_arithmetic_op(operator.mul, cls) - cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls) + result = op(Series(self), other) + if isinstance(result, tuple): + return (Index(result[0]), Index(result[1])) + return Index(result) @classmethod def _add_numeric_methods_unary(cls): @@ -5451,7 +5439,6 @@ def _evaluate_numeric_unary(self): @classmethod def _add_numeric_methods(cls): cls._add_numeric_methods_unary() - cls._add_numeric_methods_binary() def any(self, *args, **kwargs): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4a6bb11bda400..14098ddadb8e2 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -811,16 +811,13 @@ def any(self, *args, **kwargs) -> bool: # -------------------------------------------------------------------- - def _arith_method(self, other, op, step=False): + def _arith_method(self, other, op): """ Parameters ---------- other : Any op : callable that accepts 2 params perform the binary op - step : callable, optional, default to False - op to apply to the step parm if not None - if False, use the existing step """ if isinstance(other, ABCTimedeltaIndex): @@ -834,6 +831,21 @@ def _arith_method(self, other, op, step=False): # Must be an np.ndarray; GH#22390 return op(self._int64index, other) + if op in [ + operator.pow, + ops.rpow, + operator.mod, + ops.rmod, + ops.rfloordiv, + divmod, + ops.rdivmod, + ]: + return op(self._int64index, other) + + step = False + if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: + step = op + other = extract_array(other, extract_numpy=True) attrs = self._get_attributes_dict() @@ -871,35 +883,3 @@ def _arith_method(self, other, op, step=False): # Defer to Int64Index implementation return op(self._int64index, other) # TODO: Do attrs get handled reliably? - - @unpack_zerodim_and_defer("__add__") - def __add__(self, other): - return self._arith_method(other, operator.add) - - @unpack_zerodim_and_defer("__radd__") - def __radd__(self, other): - return self._arith_method(other, ops.radd) - - @unpack_zerodim_and_defer("__sub__") - def __sub__(self, other): - return self._arith_method(other, operator.sub) - - @unpack_zerodim_and_defer("__rsub__") - def __rsub__(self, other): - return self._arith_method(other, ops.rsub) - - @unpack_zerodim_and_defer("__mul__") - def __mul__(self, other): - return self._arith_method(other, operator.mul, step=operator.mul) - - @unpack_zerodim_and_defer("__rmul__") - def __rmul__(self, other): - return self._arith_method(other, ops.rmul, step=ops.rmul) - - @unpack_zerodim_and_defer("__truediv__") - def __truediv__(self, other): - return self._arith_method(other, operator.truediv, step=operator.truediv) - - @unpack_zerodim_and_defer("__rtruediv__") - def __rtruediv__(self, other): - return self._arith_method(other, ops.rtruediv, step=ops.rtruediv) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index ae21f13ea3f49..0de842e8575af 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -19,7 +19,6 @@ from pandas.core.dtypes.missing import isna from pandas.core import algorithms -from pandas.core.construction import extract_array from pandas.core.ops.array_ops import ( # noqa:F401 arithmetic_op, comp_method_OBJECT_ARRAY, @@ -27,7 +26,7 @@ get_array_op, logical_op, ) -from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.ops.common import unpack_zerodim_and_defer # noqa:F401 from pandas.core.ops.docstrings import ( _arith_doc_FRAME, _flex_comp_doc_FRAME, @@ -300,29 +299,6 @@ def align_method_SERIES(left: "Series", right, align_asobject: bool = False): return left, right -def arith_method_SERIES(cls, op, special): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - assert special # non-special uses flex_method_SERIES - op_name = _get_op_name(op, special) - - @unpack_zerodim_and_defer(op_name) - def wrapper(left, right): - res_name = get_op_result_name(left, right) - left, right = align_method_SERIES(left, right) - - lvalues = extract_array(left, extract_numpy=True) - rvalues = extract_array(right, extract_numpy=True) - result = arithmetic_op(lvalues, rvalues, op) - - return left._construct_result(result, name=res_name) - - wrapper.__name__ = op_name - return wrapper - - def flex_method_SERIES(cls, op, special): assert not special # "special" also means "not flex" name = _get_op_name(op, special) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 70fd814423c7f..05da378f8964d 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -45,7 +45,6 @@ def _get_method_wrappers(cls): # are no longer in __init__ from pandas.core.ops import ( arith_method_FRAME, - arith_method_SERIES, comp_method_FRAME, flex_comp_method_FRAME, flex_method_SERIES, @@ -55,7 +54,7 @@ def _get_method_wrappers(cls): # Just Series arith_flex = flex_method_SERIES comp_flex = flex_method_SERIES - arith_special = arith_method_SERIES + arith_special = None comp_special = None bool_special = None elif issubclass(cls, ABCDataFrame): @@ -105,20 +104,19 @@ def f(self, other): f.__name__ = f"__i{name}__" return f - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(new_methods["__add__"]), - __isub__=_wrap_inplace_method(new_methods["__sub__"]), - __imul__=_wrap_inplace_method(new_methods["__mul__"]), - __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), - __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), - __imod__=_wrap_inplace_method(new_methods["__mod__"]), - __ipow__=_wrap_inplace_method(new_methods["__pow__"]), - ) - ) - if bool_method is None: - # Series gets bool_method via OpsMixin + # Series gets bool_method, arith_method via OpsMixin + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(cls.__add__), + __isub__=_wrap_inplace_method(cls.__sub__), + __imul__=_wrap_inplace_method(cls.__mul__), + __itruediv__=_wrap_inplace_method(cls.__truediv__), + __ifloordiv__=_wrap_inplace_method(cls.__floordiv__), + __imod__=_wrap_inplace_method(cls.__mod__), + __ipow__=_wrap_inplace_method(cls.__pow__), + ) + ) new_methods.update( dict( __iand__=_wrap_inplace_method(cls.__and__), @@ -127,6 +125,17 @@ def f(self, other): ) ) else: + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(new_methods["__add__"]), + __isub__=_wrap_inplace_method(new_methods["__sub__"]), + __imul__=_wrap_inplace_method(new_methods["__mul__"]), + __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), + __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), + __imod__=_wrap_inplace_method(new_methods["__mod__"]), + __ipow__=_wrap_inplace_method(new_methods["__pow__"]), + ) + ) new_methods.update( dict( __iand__=_wrap_inplace_method(new_methods["__and__"]), @@ -172,30 +181,34 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series - new_methods = dict( - add=arith_method(cls, operator.add, special), - radd=arith_method(cls, radd, special), - sub=arith_method(cls, operator.sub, special), - mul=arith_method(cls, operator.mul, special), - truediv=arith_method(cls, operator.truediv, special), - floordiv=arith_method(cls, operator.floordiv, special), - mod=arith_method(cls, operator.mod, special), - pow=arith_method(cls, operator.pow, special), - # not entirely sure why this is necessary, but previously was included - # so it's here to maintain compatibility - rmul=arith_method(cls, rmul, special), - rsub=arith_method(cls, rsub, special), - rtruediv=arith_method(cls, rtruediv, special), - rfloordiv=arith_method(cls, rfloordiv, special), - rpow=arith_method(cls, rpow, special), - rmod=arith_method(cls, rmod, special), - ) - new_methods["div"] = new_methods["truediv"] - new_methods["rdiv"] = new_methods["rtruediv"] - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods["divmod"] = arith_method(cls, divmod, special) - new_methods["rdivmod"] = arith_method(cls, rdivmod, special) + new_methods = {} + if arith_method is not None: + new_methods.update( + dict( + add=arith_method(cls, operator.add, special), + radd=arith_method(cls, radd, special), + sub=arith_method(cls, operator.sub, special), + mul=arith_method(cls, operator.mul, special), + truediv=arith_method(cls, operator.truediv, special), + floordiv=arith_method(cls, operator.floordiv, special), + mod=arith_method(cls, operator.mod, special), + pow=arith_method(cls, operator.pow, special), + # not entirely sure why this is necessary, but previously was included + # so it's here to maintain compatibility + rmul=arith_method(cls, rmul, special), + rsub=arith_method(cls, rsub, special), + rtruediv=arith_method(cls, rtruediv, special), + rfloordiv=arith_method(cls, rfloordiv, special), + rpow=arith_method(cls, rpow, special), + rmod=arith_method(cls, rmod, special), + ) + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(cls, divmod, special) + new_methods["rdivmod"] = arith_method(cls, rdivmod, special) if comp_method is not None: # Series already has this pinned @@ -210,7 +223,7 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): ) ) - if bool_method: + if bool_method is not None: new_methods.update( dict( and_=bool_method(cls, operator.and_, special), diff --git a/pandas/core/series.py b/pandas/core/series.py index 9bd41ca0e76db..fe73a85499c08 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4990,6 +4990,24 @@ def _logical_method(self, other, op): res_values = ops.logical_op(lvalues, rvalues, op) return self._construct_result(res_values, name=res_name) + def _arith_method(self, other, op): + res_name = ops.get_op_result_name(self, other) + self, other = ops.align_method_SERIES(self, other) + + lvalues = extract_array(self, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True) + result = ops.arithmetic_op(lvalues, rvalues, op) + + return self._construct_result(result, name=res_name) + + def __div__(self, other): + # Alias for backward compat + return self.__truediv__(other) + + def __rdiv__(self, other): + # Alias for backward compat + return self.__rtruediv__(other) + Series._add_numeric_operations() diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b3dfb5d015ab4..3e979aed0551f 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2159,7 +2159,7 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, xbox) - result = ser.__rdiv__(tdi) + result = ser.__rtruediv__(tdi) if box is pd.DataFrame: # TODO: Should we skip this case sooner or test something else? assert result is NotImplemented From 45c8618e76b146be84c59a1a362988cb2fae1e1d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 3 Oct 2020 16:35:51 -0700 Subject: [PATCH 02/17] REF: separate arith_method_FRAME from flex_arith_method_FRAME --- pandas/core/ops/__init__.py | 38 +++++++++++++++++++-------- pandas/core/ops/methods.py | 3 ++- pandas/tests/frame/test_arithmetic.py | 7 +++++ pandas/tests/series/test_operators.py | 13 --------- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 0de842e8575af..b656aef64cde9 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -533,18 +533,13 @@ def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int return type(frame)(rvalues, index=frame.index, columns=frame.columns) -def arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): - # This is the only function where `special` can be either True or False +def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): + assert not special op_name = _get_op_name(op, special) default_axis = None if special else "columns" na_op = get_array_op(op) - - if op_name in _op_descriptions: - # i.e. include "add" but not "__add__" - doc = _make_flex_doc(op_name, "dataframe") - else: - doc = _arith_doc_FRAME % op_name + doc = _make_flex_doc(op_name, "dataframe") @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): @@ -561,8 +556,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): axis = self._get_axis_number(axis) if axis is not None else 1 - # TODO: why are we passing flex=True instead of flex=not special? - # 15 tests fail if we pass flex=not special instead self, other = align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): @@ -585,6 +578,29 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f +def arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): + assert special + op_name = _get_op_name(op, special) + doc = _arith_doc_FRAME % op_name + + @Appender(doc) + def f(self, other): + + if _should_reindex_frame_op(self, other, op, 1, 1, None, None): + return _frame_arith_method_with_reindex(self, other, op) + + axis = 1 # only relevant for Series other case + + self, other = align_method_FRAME(self, other, axis, flex=True, level=None) + + new_data = dispatch_to_series(self, other, op, axis=axis) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + def flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): assert not special # "special" also means "not flex" op_name = _get_op_name(op, special) @@ -616,7 +632,7 @@ def comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): def f(self, other): axis = 1 # only relevant for Series other case - self, other = align_method_FRAME(self, other, axis, level=None, flex=False) + self, other = align_method_FRAME(self, other, axis, flex=False, level=None) # See GH#4537 for discussion of scalar op behavior new_data = dispatch_to_series(self, other, op, axis=axis) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 05da378f8964d..86981f007a678 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -46,6 +46,7 @@ def _get_method_wrappers(cls): from pandas.core.ops import ( arith_method_FRAME, comp_method_FRAME, + flex_arith_method_FRAME, flex_comp_method_FRAME, flex_method_SERIES, ) @@ -58,7 +59,7 @@ def _get_method_wrappers(cls): comp_special = None bool_special = None elif issubclass(cls, ABCDataFrame): - arith_flex = arith_method_FRAME + arith_flex = flex_arith_method_FRAME comp_flex = flex_comp_method_FRAME arith_special = arith_method_FRAME comp_special = comp_method_FRAME diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d9ef19e174700..94f813fd08128 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1484,6 +1484,13 @@ def test_no_warning(self, all_arithmetic_operators): df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) b = df["B"] with tm.assert_produces_warning(None): + getattr(df, all_arithmetic_operators)(b) + + def test_dunder_methods_binary(self, all_arithmetic_operators): + # GH#??? frame.__foo__ should only accept one argument + df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with pytest.raises(TypeError, match="takes 2 positional arguments"): getattr(df, all_arithmetic_operators)(b, 0) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index a796023c75b78..df6b8187964e8 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -276,25 +276,12 @@ def test_scalar_na_logical_ops_corners_aligns(self): expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9))) - result = d.__and__(s, axis="columns") - tm.assert_frame_equal(result, expected) - - result = d.__and__(s, axis=1) - tm.assert_frame_equal(result, expected) - result = s & d tm.assert_frame_equal(result, expected) result = d & s tm.assert_frame_equal(result, expected) - expected = (s & s).to_frame("A") - result = d.__and__(s, axis="index") - tm.assert_frame_equal(result, expected) - - result = d.__and__(s, axis=0) - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor]) def test_logical_ops_with_index(self, op): # GH#22092, GH#19792 From f553646d775a6eb3825b968a58d6108706626eea Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Oct 2020 10:39:28 -0700 Subject: [PATCH 03/17] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 57e3c9dd66afb..22cbe521c5025 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -361,6 +361,7 @@ Numeric - Bug in :meth:`DataFrame.__rmatmul__` error handling reporting transposed shapes (:issue:`21581`) - Bug in :class:`Series` flex arithmetic methods where the result when operating with a ``list``, ``tuple`` or ``np.ndarray`` would have an incorrect name (:issue:`36760`) - Bug in :class:`IntegerArray` multiplication with ``timedelta`` and ``np.timedelta64`` objects (:issue:`36870`) +- Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`) Conversion ^^^^^^^^^^ From 3a855ba7d976a23dfd023bc407145ab79365eb83 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 8 Oct 2020 04:14:04 -0700 Subject: [PATCH 04/17] REF/TYP: use OpsMixin for logical methods (#36964) --- pandas/core/ops/methods.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 86981f007a678..4fe172d3a2baa 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -125,6 +125,7 @@ def f(self, other): __ixor__=_wrap_inplace_method(cls.__xor__), ) ) + else: new_methods.update( dict( From 6ed3317b940afbf8c3433bcf5001a6a1ead8063b Mon Sep 17 00:00:00 2001 From: krajatcl <53620269+krajatcl@users.noreply.github.com> Date: Sat, 10 Oct 2020 21:32:07 +0530 Subject: [PATCH 05/17] TST: insert 'match' to bare pytest raises in pandas/tests/tools/test_to_datetime.py (#37027) --- pandas/tests/tools/test_to_datetime.py | 83 +++++++++++++++++--------- 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 819474e1f32e7..ef7c4be20e22e 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -349,7 +349,9 @@ def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): def test_to_datetime_parse_timezone_malformed(self, offset): fmt = "%Y-%m-%d %H:%M:%S %z" date = "2010-01-01 12:00:00 " + offset - with pytest.raises(ValueError): + + msg = "does not match format|unconverted data remains" + with pytest.raises(ValueError, match=msg): pd.to_datetime([date], format=fmt) def test_to_datetime_parse_timezone_keeps_name(self): @@ -784,17 +786,19 @@ def test_to_datetime_tz_psycopg2(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_datetime_bool(self, cache): # GH13176 - with pytest.raises(TypeError): + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): to_datetime(False) assert to_datetime(False, errors="coerce", cache=cache) is NaT assert to_datetime(False, errors="ignore", cache=cache) is False - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): to_datetime(True) assert to_datetime(True, errors="coerce", cache=cache) is NaT assert to_datetime(True, errors="ignore", cache=cache) is True - with pytest.raises(TypeError): + msg = f"{type(cache)} is not convertible to datetime" + with pytest.raises(TypeError, match=msg): to_datetime([False, datetime.today()], cache=cache) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): to_datetime(["20130101", True], cache=cache) tm.assert_index_equal( to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache), @@ -805,10 +809,10 @@ def test_datetime_bool(self, cache): def test_datetime_invalid_datatype(self): # GH13176 - - with pytest.raises(TypeError): + msg = "is not convertible to datetime" + with pytest.raises(TypeError, match=msg): pd.to_datetime(bool) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): pd.to_datetime(pd.to_datetime) @pytest.mark.parametrize("value", ["a", "00:01:99"]) @@ -826,7 +830,12 @@ def test_datetime_invalid_scalar(self, value, format, infer): ) assert res is pd.NaT - with pytest.raises(ValueError): + msg = ( + "is a bad directive in format|" + "second must be in 0..59: 00:01:99|" + "Given date string not likely a datetime" + ) + with pytest.raises(ValueError, match=msg): pd.to_datetime( value, errors="raise", format=format, infer_datetime_format=infer ) @@ -847,12 +856,14 @@ def test_datetime_outofbounds_scalar(self, value, format, infer): assert res is pd.NaT if format is not None: - with pytest.raises(ValueError): + msg = "is a bad directive in format|Out of bounds nanosecond timestamp" + with pytest.raises(ValueError, match=msg): pd.to_datetime( value, errors="raise", format=format, infer_datetime_format=infer ) else: - with pytest.raises(OutOfBoundsDatetime): + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): pd.to_datetime( value, errors="raise", format=format, infer_datetime_format=infer ) @@ -872,7 +883,12 @@ def test_datetime_invalid_index(self, values, format, infer): ) tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values))) - with pytest.raises(ValueError): + msg = ( + "is a bad directive in format|" + "Given date string not likely a datetime|" + "second must be in 0..59: 00:01:99" + ) + with pytest.raises(ValueError, match=msg): pd.to_datetime( values, errors="raise", format=format, infer_datetime_format=infer ) @@ -1070,7 +1086,8 @@ def test_timestamp_utc_true(self, ts, expected): @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 - with pytest.raises(OutOfBoundsDatetime): + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): pd.to_datetime(dt_str, format="%Y%m%d") def test_to_datetime_utc(self): @@ -1096,8 +1113,8 @@ class TestToDatetimeUnit: def test_unit(self, cache): # GH 11758 # test proper behavior with errors - - with pytest.raises(ValueError): + msg = "cannot specify both format and unit" + with pytest.raises(ValueError, match=msg): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] @@ -1123,7 +1140,8 @@ def test_unit(self, cache): ) tm.assert_index_equal(result, expected) - with pytest.raises(tslib.OutOfBoundsDatetime): + msg = "cannot convert input 11111111 with the unit 'D'" + with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): to_datetime(values, unit="D", errors="raise", cache=cache) values = [1420043460000, iNaT, NaT, np.nan, "NaT"] @@ -1136,7 +1154,8 @@ def test_unit(self, cache): expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) tm.assert_index_equal(result, expected) - with pytest.raises(tslib.OutOfBoundsDatetime): + msg = "cannot convert input 1420043460000 with the unit 's'" + with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): to_datetime(values, errors="raise", unit="s", cache=cache) # if we have a string, then we raise a ValueError @@ -1204,7 +1223,8 @@ def test_unit_mixed(self, cache): result = pd.to_datetime(arr, errors="coerce", cache=cache) tm.assert_index_equal(result, expected) - with pytest.raises(ValueError): + msg = "mixed datetimes and integers in passed array" + with pytest.raises(ValueError, match=msg): pd.to_datetime(arr, errors="raise", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "2013-01-01"]) @@ -1212,7 +1232,7 @@ def test_unit_mixed(self, cache): result = pd.to_datetime(arr, errors="coerce", cache=cache) tm.assert_index_equal(result, expected) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): pd.to_datetime(arr, errors="raise", cache=cache) @pytest.mark.parametrize("cache", [True, False]) @@ -1392,7 +1412,8 @@ def test_dataframe_dtypes(self, cache): # float df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) - with pytest.raises(ValueError): + msg = "cannot assemble the datetimes: unconverted data remains: 1" + with pytest.raises(ValueError, match=msg): to_datetime(df, cache=cache) def test_dataframe_utc_true(self): @@ -1500,7 +1521,8 @@ def test_to_datetime_barely_out_of_bounds(self): # in an in-bounds datetime arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) - with pytest.raises(OutOfBoundsDatetime): + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(arr) @pytest.mark.parametrize("cache", [True, False]) @@ -1638,7 +1660,8 @@ def test_to_datetime_overflow(self): # gh-17637 # we are overflowing Timedelta range here - with pytest.raises(OverflowError): + msg = "Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) @pytest.mark.parametrize("cache", [True, False]) @@ -2265,23 +2288,26 @@ def test_julian_round_trip(self): assert result.to_julian_date() == 2456658 # out-of-bounds - with pytest.raises(ValueError): + msg = "1 is Out of Bounds for origin='julian'" + with pytest.raises(ValueError, match=msg): pd.to_datetime(1, origin="julian", unit="D") def test_invalid_unit(self, units, julian_dates): # checking for invalid combination of origin='julian' and unit != D if units != "D": - with pytest.raises(ValueError): + msg = "unit must be 'D' for origin='julian'" + with pytest.raises(ValueError, match=msg): pd.to_datetime(julian_dates, unit=units, origin="julian") def test_invalid_origin(self): # need to have a numeric specified - with pytest.raises(ValueError): + msg = "it must be numeric with a unit specified" + with pytest.raises(ValueError, match=msg): pd.to_datetime("2005-01-01", origin="1960-01-01") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): pd.to_datetime("2005-01-01", origin="1960-01-01", unit="D") def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): @@ -2304,12 +2330,13 @@ def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): ) def test_invalid_origins(self, origin, exc, units, units_from_epochs): - with pytest.raises(exc): + msg = f"origin {origin} (is Out of Bounds|cannot be converted to a Timestamp)" + with pytest.raises(exc, match=msg): pd.to_datetime(units_from_epochs, unit=units, origin=origin) def test_invalid_origins_tzinfo(self): # GH16842 - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="must be tz-naive"): pd.to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) @pytest.mark.parametrize("format", [None, "%Y-%m-%d %H:%M:%S"]) From c7651cdfd6342856b889bea97783085e174b7221 Mon Sep 17 00:00:00 2001 From: krajatcl <53620269+krajatcl@users.noreply.github.com> Date: Sat, 10 Oct 2020 21:32:56 +0530 Subject: [PATCH 06/17] TST: insert 'match' to bare pytest raises in pandas/tests/test_flags.py (#37026) Co-authored-by: Rajat Bishnoi --- pandas/tests/test_flags.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_flags.py b/pandas/tests/test_flags.py index f6e3ae4980afb..9294b3fc3319b 100644 --- a/pandas/tests/test_flags.py +++ b/pandas/tests/test_flags.py @@ -41,8 +41,8 @@ def test_getitem(self): flags["allows_duplicate_labels"] = False assert flags["allows_duplicate_labels"] is False - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="a"): flags["a"] - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="a"): flags["a"] = 10 From 919cbbc03e81cfe5a63aa8ce802bb47f0279ff40 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Oct 2020 09:23:27 -0700 Subject: [PATCH 07/17] TYP: generic, series, frame (#36989) --- pandas/core/frame.py | 4 ++-- pandas/core/generic.py | 18 +++++++++++++----- pandas/core/series.py | 11 ++++++++--- pandas/tests/series/methods/test_count.py | 8 ++++++++ setup.cfg | 3 --- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ebe5185ce4488..fd7d0190dbbcb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9311,8 +9311,8 @@ def _AXIS_NAMES(self) -> Dict[int, str]: ops.add_special_arithmetic_methods(DataFrame) -def _from_nested_dict(data): - new_data = collections.defaultdict(dict) +def _from_nested_dict(data) -> collections.defaultdict: + new_data: collections.defaultdict = collections.defaultdict(dict) for index, s in data.items(): for col, v in s.items(): new_data[col][index] = v diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 338b45b5503dc..8cc6ca6630099 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -112,6 +112,7 @@ if TYPE_CHECKING: from pandas._libs.tslibs import BaseOffset + from pandas.core.frame import DataFrame from pandas.core.resample import Resampler from pandas.core.series import Series from pandas.core.window.indexers import BaseIndexer @@ -130,7 +131,7 @@ ) -def _single_replace(self, to_replace, method, inplace, limit): +def _single_replace(self: "Series", to_replace, method, inplace, limit): """ Replaces values in a Series using the fill method specified when no replacement value is given in the replace method @@ -541,6 +542,7 @@ def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: from pandas.core.computation.parsing import clean_column_name if isinstance(self, ABCSeries): + self = cast("Series", self) return {clean_column_name(self.name): self} return { @@ -1995,9 +1997,10 @@ def _repr_data_resource_(self): """ if config.get_option("display.html.table_schema"): data = self.head(config.get_option("display.max_rows")) - payload = json.loads( - data.to_json(orient="table"), object_pairs_hook=collections.OrderedDict - ) + + as_json = data.to_json(orient="table") + as_json = cast(str, as_json) + payload = json.loads(as_json, object_pairs_hook=collections.OrderedDict) return payload # ---------------------------------------------------------------------- @@ -3113,6 +3116,7 @@ def to_latex( if multirow is None: multirow = config.get_option("display.latex.multirow") + self = cast("DataFrame", self) formatter = DataFrameFormatter( self, columns=columns, @@ -3830,7 +3834,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): # the copy weakref if self._is_copy is not None and not isinstance(self._is_copy, str): r = self._is_copy() - if not gc.get_referents(r) or r.shape == self.shape: + if not gc.get_referents(r) or (r is not None and r.shape == self.shape): self._is_copy = None return @@ -6684,6 +6688,7 @@ def replace( return self.apply( _single_replace, args=(to_replace, method, inplace, limit) ) + self = cast("Series", self) return _single_replace(self, to_replace, method, inplace, limit) if not is_dict_like(to_replace): @@ -7265,10 +7270,13 @@ def asof(self, where, subset=None): nulls = self.isna() if is_series else self[subset].isna().any(1) if nulls.all(): if is_series: + self = cast("Series", self) return self._constructor(np.nan, index=where, name=self.name) elif is_list: + self = cast("DataFrame", self) return self._constructor(np.nan, index=where, columns=self.columns) else: + self = cast("DataFrame", self) return self._constructor_sliced( np.nan, index=self.columns, name=where[0] ) diff --git a/pandas/core/series.py b/pandas/core/series.py index bec4445ecac83..8029b7e5bd9f7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1789,12 +1789,17 @@ def count(self, level=None): """ if level is None: return notna(self.array).sum() + elif not isinstance(self.index, MultiIndex): + raise ValueError("Series.count level is only valid with a MultiIndex") + + index = self.index + assert isinstance(index, MultiIndex) # for mypy if isinstance(level, str): - level = self.index._get_level_number(level) + level = index._get_level_number(level) - lev = self.index.levels[level] - level_codes = np.array(self.index.codes[level], subok=False, copy=True) + lev = index.levels[level] + level_codes = np.array(index.codes[level], subok=False, copy=True) mask = level_codes == -1 if mask.any(): diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 1ca48eeb7c441..19290b6a5c23f 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import Categorical, MultiIndex, Series @@ -6,6 +7,13 @@ class TestSeriesCount: + def test_count_level_without_multiindex(self): + ser = pd.Series(range(3)) + + msg = "Series.count level is only valid with a MultiIndex" + with pytest.raises(ValueError, match=msg): + ser.count(level=1) + def test_count(self, datetime_series): assert datetime_series.count() == len(datetime_series) diff --git a/setup.cfg b/setup.cfg index f7d5d39c88968..4de8009f968f7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -205,9 +205,6 @@ check_untyped_defs=False [mypy-pandas.core.reshape.merge] check_untyped_defs=False -[mypy-pandas.core.series] -check_untyped_defs=False - [mypy-pandas.core.window.common] check_untyped_defs=False From 81ac02aca976b5cfadc7bd621abd9d15c5666103 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 10 Oct 2020 11:24:06 -0500 Subject: [PATCH 08/17] CI: pin pymysql #36465 (#36847) * CI: unpin sql to verify the bugs #36465 * CI: pin sqlalchemy * CI: pin pymsql * CI: pin sqlalchemy * CI: pin pymysql * CI: pin pymysql * CI: add note --- ci/deps/travis-37-cov.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/deps/travis-37-cov.yaml b/ci/deps/travis-37-cov.yaml index 7d5104a58ce83..c89b42ef06a2e 100644 --- a/ci/deps/travis-37-cov.yaml +++ b/ci/deps/travis-37-cov.yaml @@ -32,7 +32,7 @@ dependencies: - google-cloud-bigquery>=1.27.2 # GH 36436 - psycopg2 - pyarrow>=0.15.0 - - pymysql=0.7.11 + - pymysql<0.10.0 # temporary pin, GH 36465 - pytables - python-snappy - python-dateutil @@ -40,7 +40,7 @@ dependencies: - s3fs>=0.4.0 - scikit-learn - scipy - - sqlalchemy=1.3.0 + - sqlalchemy - statsmodels - xarray - xlrd From 8bec3a7833d570b2521a09ab12f731a598422c4c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Oct 2020 09:27:01 -0700 Subject: [PATCH 09/17] CLN/REF: de-duplicate DatetimeTZBlock.setitem (#37019) --- pandas/core/internals/blocks.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index be105f0035447..54ac1a3fd52c2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1758,6 +1758,14 @@ def setitem(self, indexer, value): `indexer` is a direct slice/positional indexer. `value` must be a compatible shape. """ + if not self._can_hold_element(value): + # This is only relevant for DatetimeTZBlock, which has a + # non-trivial `_can_hold_element`. + # https://github.com/pandas-dev/pandas/issues/24020 + # Need a dedicated setitem until GH#24020 (type promotion in setitem + # for extension arrays) is designed and implemented. + return self.astype(object).setitem(indexer, value) + if isinstance(indexer, tuple): # TODO(EA2D): not needed with 2D EAs # we are always 1-D @@ -2175,7 +2183,13 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): def _can_hold_element(self, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) if tipo is not None: - if self.is_datetimetz: + if isinstance(element, list) and len(element) == 0: + # Following DatetimeArray._validate_setitem_value + # convention, we treat this as object-dtype + # (even though tipo is float64) + return True + + elif self.is_datetimetz: # require exact match, since non-nano does not exist return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype( element, self.dtype @@ -2339,21 +2353,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): value, limit=limit, inplace=inplace, downcast=downcast ) - def setitem(self, indexer, value): - # https://github.com/pandas-dev/pandas/issues/24020 - # Need a dedicated setitem until #24020 (type promotion in setitem - # for extension arrays) is designed and implemented. - if self._can_hold_element(value) or ( - isinstance(indexer, np.ndarray) and indexer.size == 0 - ): - return super().setitem(indexer, value) - - obj_vals = self.values.astype(object) - newb = make_block( - obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim - ) - return newb.setitem(indexer, value) - def quantile(self, qs, interpolation="linear", axis=0): naive = self.values.view("M8[ns]") From f86ff046b86a7773cad194aa98e45fa15498c5fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Oct 2020 09:28:37 -0700 Subject: [PATCH 10/17] REF/TYP: define NDFrame numeric methods non-dynamically (#37017) --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 841 ++++++++++++++++++++++++----------------- 2 files changed, 494 insertions(+), 349 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fd7d0190dbbcb..8a330e3d595cf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8625,7 +8625,7 @@ def _reduce( "will include datetime64 and datetime64tz columns in a " "future version.", FutureWarning, - stacklevel=3, + stacklevel=5, ) cols = self.columns[~dtype_is_dt] self = self[cols] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8cc6ca6630099..4a197468cdc22 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -51,7 +51,6 @@ TimestampConvertibleTypes, ValueKeyFunc, ) -from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError, InvalidIndexError @@ -10407,6 +10406,287 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) return grouped.aggregate(applyf) + def _logical_func( + self, name: str, func, axis=0, bool_only=None, skipna=True, level=None, **kwargs + ): + nv.validate_logical_func(tuple(), kwargs, fname=name) + if level is not None: + if bool_only is not None: + raise NotImplementedError( + "Option bool_only is not implemented with option level." + ) + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + + if self.ndim > 1 and axis is None: + # Reduce along one dimension then the other, to simplify DataFrame._reduce + res = self._logical_func( + name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs + ) + return res._logical_func(name, func, skipna=skipna, **kwargs) + + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=bool_only, + filter_type="bool", + ) + + def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return self._logical_func( + "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs + ) + + def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return self._logical_func( + "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs + ) + + def _accum_func(self, name: str, func, axis=None, skipna=True, *args, **kwargs): + skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + if axis == 1: + return self.T._accum_func( + name, func, axis=0, skipna=skipna, *args, **kwargs + ).T + + def block_accum_func(blk_values): + values = blk_values.T if hasattr(blk_values, "T") else blk_values + + result = nanops.na_accum_func(values, func, skipna=skipna) + + result = result.T if hasattr(result, "T") else result + return result + + result = self._mgr.apply(block_accum_func) + + return self._constructor(result).__finalize__(self, method=name) + + def cummax(self, axis=None, skipna=True, *args, **kwargs): + return self._accum_func( + "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs + ) + + def cummin(self, axis=None, skipna=True, *args, **kwargs): + return self._accum_func( + "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs + ) + + def cumsum(self, axis=None, skipna=True, *args, **kwargs): + return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) + + def cumprod(self, axis=None, skipna=True, *args, **kwargs): + return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) + + def _stat_function_ddof( + self, + name: str, + func, + axis=None, + skipna=None, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, ddof=ddof + ) + return self._reduce( + func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof + ) + + def sem( + self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs + ): + return self._stat_function_ddof( + "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def var( + self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs + ): + return self._stat_function_ddof( + "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def std( + self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs + ): + return self._stat_function_ddof( + "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs + ) + + def _stat_function( + self, + name: str, + func, + axis=None, + skipna=None, + level=None, + numeric_only=None, + **kwargs, + ): + if name == "median": + nv.validate_median(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + return self._reduce( + func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only + ) + + def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "min", nanops.nanmin, axis, skipna, level, numeric_only, **kwargs + ) + + def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "max", nanops.nanmax, axis, skipna, level, numeric_only, **kwargs + ) + + def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs + ) + + def median(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs + ) + + def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs + ) + + def kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return self._stat_function( + "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs + ) + + kurtosis = kurt + + def _min_count_stat_function( + self, + name: str, + func, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + if name == "sum": + nv.validate_sum(tuple(), kwargs) + elif name == "prod": + nv.validate_prod(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, min_count=min_count + ) + return self._reduce( + func, + name=name, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + ) + + def sum( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return self._min_count_stat_function( + "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + def prod( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return self._min_count_stat_function( + "prod", + nanops.nanprod, + axis, + skipna, + level, + numeric_only, + min_count, + **kwargs, + ) + + product = prod + + def mad(self, axis=None, skipna=None, level=None): + """ + {desc} + + Parameters + ---------- + axis : {axis_descr} + Axis for the function to be applied on. + skipna : bool, default None + Exclude NA/null values when computing the result. + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a {name1}. + + Returns + ------- + {name1} or {name2} (if level specified)\ + {see_also}\ + {examples} + """ + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) + + data = self._get_numeric_data() + if axis == 0: + demeaned = data - data.mean(axis=0) + else: + demeaned = data.sub(data.mean(axis=1), axis=0) + return np.abs(demeaned).mean(axis=axis, skipna=skipna) + @classmethod def _add_numeric_operations(cls): """ @@ -10414,30 +10694,35 @@ def _add_numeric_operations(cls): """ axis_descr, name1, name2 = _doc_parms(cls) - cls.any = _make_logical_function( - cls, - "any", + @doc( + _bool_doc, + desc=_any_desc, name1=name1, name2=name2, axis_descr=axis_descr, - desc=_any_desc, - func=nanops.nanany, see_also=_any_see_also, examples=_any_examples, empty_value=False, ) - cls.all = _make_logical_function( - cls, - "all", + def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) + + cls.any = any + + @doc( + _bool_doc, + desc=_all_desc, name1=name1, name2=name2, axis_descr=axis_descr, - desc=_all_desc, - func=nanops.nanall, see_also=_all_see_also, examples=_all_examples, empty_value=True, ) + def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) + + cls.all = all @doc( desc="Return the mean absolute deviation of the values " @@ -10448,209 +10733,284 @@ def _add_numeric_operations(cls): see_also="", examples="", ) + @Appender(NDFrame.mad.__doc__) def mad(self, axis=None, skipna=None, level=None): - """ - {desc} - - Parameters - ---------- - axis : {axis_descr} - Axis for the function to be applied on. - skipna : bool, default None - Exclude NA/null values when computing the result. - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - - Returns - ------- - {name1} or {name2} (if level specified)\ - {see_also}\ - {examples} - """ - if skipna is None: - skipna = True - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) - - data = self._get_numeric_data() - if axis == 0: - demeaned = data - data.mean(axis=0) - else: - demeaned = data.sub(data.mean(axis=1), axis=0) - return np.abs(demeaned).mean(axis=axis, skipna=skipna) + return NDFrame.mad(self, axis, skipna, level) cls.mad = mad - cls.sem = _make_stat_function_ddof( - cls, - "sem", - name1=name1, - name2=name2, - axis_descr=axis_descr, + @doc( + _num_ddof_doc, desc="Return unbiased standard error of the mean over requested " "axis.\n\nNormalized by N-1 by default. This can be changed " "using the ddof argument", - func=nanops.nansem, - ) - cls.var = _make_stat_function_ddof( - cls, - "var", name1=name1, name2=name2, axis_descr=axis_descr, + ) + def sem( + self, + axis=None, + skipna=None, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + cls.sem = sem + + @doc( + _num_ddof_doc, desc="Return unbiased variance over requested axis.\n\nNormalized by " "N-1 by default. This can be changed using the ddof argument", - func=nanops.nanvar, - ) - cls.std = _make_stat_function_ddof( - cls, - "std", name1=name1, name2=name2, axis_descr=axis_descr, + ) + def var( + self, + axis=None, + skipna=None, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) + + cls.var = var + + @doc( + _num_ddof_doc, desc="Return sample standard deviation over requested axis." "\n\nNormalized by N-1 by default. This can be changed using the " "ddof argument", - func=nanops.nanstd, + name1=name1, + name2=name2, + axis_descr=axis_descr, ) + def std( + self, + axis=None, + skipna=None, + level=None, + ddof=1, + numeric_only=None, + **kwargs, + ): + return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) - cls.cummin = _make_cum_function( - cls, - "cummin", + cls.std = std + + @doc( + _cnum_doc, + desc="minimum", name1=name1, name2=name2, axis_descr=axis_descr, - desc="minimum", - accum_func=np.minimum.accumulate, accum_func_name="min", examples=_cummin_examples, ) - cls.cumsum = _make_cum_function( - cls, - "cumsum", + def cummin(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + + cls.cummin = cummin + + @doc( + _cnum_doc, + desc="maximum", name1=name1, name2=name2, axis_descr=axis_descr, + accum_func_name="max", + examples=_cummax_examples, + ) + def cummax(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + + cls.cummax = cummax + + @doc( + _cnum_doc, desc="sum", - accum_func=np.cumsum, + name1=name1, + name2=name2, + axis_descr=axis_descr, accum_func_name="sum", examples=_cumsum_examples, ) - cls.cumprod = _make_cum_function( - cls, - "cumprod", + def cumsum(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + + cls.cumsum = cumsum + + @doc( + _cnum_doc, + desc="product", name1=name1, name2=name2, axis_descr=axis_descr, - desc="product", - accum_func=np.cumprod, accum_func_name="prod", examples=_cumprod_examples, ) - cls.cummax = _make_cum_function( - cls, - "cummax", - name1=name1, - name2=name2, - axis_descr=axis_descr, - desc="maximum", - accum_func=np.maximum.accumulate, - accum_func_name="max", - examples=_cummax_examples, - ) + def cumprod(self, axis=None, skipna=True, *args, **kwargs): + return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + + cls.cumprod = cumprod - cls.sum = _make_min_count_stat_function( - cls, - "sum", + @doc( + _num_doc, + desc="Return the sum of the values for the requested axis.\n\n" + "This is equivalent to the method ``numpy.sum``.", name1=name1, name2=name2, axis_descr=axis_descr, - desc="Return the sum of the values for the requested axis.\n\n" - "This is equivalent to the method ``numpy.sum``.", - func=nanops.nansum, + min_count=_min_count_stub, see_also=_stat_func_see_also, examples=_sum_examples, ) - cls.mean = _make_stat_function( - cls, - "mean", + def sum( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.sum( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + cls.sum = sum + + @doc( + _num_doc, + desc="Return the product of the values for the requested axis.", name1=name1, name2=name2, axis_descr=axis_descr, - desc="Return the mean of the values for the requested axis.", - func=nanops.nanmean, + min_count=_min_count_stub, + see_also=_stat_func_see_also, + examples=_prod_examples, ) - cls.skew = _make_stat_function( - cls, - "skew", + def prod( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + return NDFrame.prod( + self, axis, skipna, level, numeric_only, min_count, **kwargs + ) + + cls.prod = prod + cls.product = prod + + @doc( + _num_doc, + desc="Return the mean of the values for the requested axis.", name1=name1, name2=name2, axis_descr=axis_descr, - desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", - func=nanops.nanskew, + min_count="", + see_also="", + examples="", ) - cls.kurt = _make_stat_function( - cls, - "kurt", + def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) + + cls.mean = mean + + @doc( + _num_doc, + desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", name1=name1, name2=name2, axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) + + cls.skew = skew + + @doc( + _num_doc, desc="Return unbiased kurtosis over requested axis.\n\n" "Kurtosis obtained using Fisher's definition of\n" "kurtosis (kurtosis of normal == 0.0). Normalized " "by N-1.", - func=nanops.nankurt, - ) - cls.kurtosis = cls.kurt - cls.prod = _make_min_count_stat_function( - cls, - "prod", name1=name1, name2=name2, axis_descr=axis_descr, - desc="Return the product of the values for the requested axis.", - func=nanops.nanprod, - examples=_prod_examples, + min_count="", + see_also="", + examples="", ) - cls.product = cls.prod - cls.median = _make_stat_function( - cls, - "median", - name1=name1, - name2=name2, - axis_descr=axis_descr, + def kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) + + cls.kurt = kurt + cls.kurtosis = kurt + + @doc( + _num_doc, desc="Return the median of the values for the requested axis.", - func=nanops.nanmedian, - ) - cls.max = _make_stat_function( - cls, - "max", name1=name1, name2=name2, axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + def median( + self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs + ): + return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) + + cls.median = median + + @doc( + _num_doc, desc="Return the maximum of the values for the requested axis.\n\n" "If you want the *index* of the maximum, use ``idxmax``. This is" "the equivalent of the ``numpy.ndarray`` method ``argmax``.", - func=nanops.nanmax, - see_also=_stat_func_see_also, - examples=_max_examples, - ) - cls.min = _make_stat_function( - cls, - "min", name1=name1, name2=name2, axis_descr=axis_descr, + min_count="", + see_also=_stat_func_see_also, + examples=_max_examples, + ) + def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) + + cls.max = max + + @doc( + _num_doc, desc="Return the minimum of the values for the requested axis.\n\n" "If you want the *index* of the minimum, use ``idxmin``. This is" "the equivalent of the ``numpy.ndarray`` method ``argmin``.", - func=nanops.nanmin, + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", see_also=_stat_func_see_also, examples=_min_examples, ) + def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): + return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) + + cls.min = min @doc(Rolling) def rolling( @@ -11430,218 +11790,3 @@ def _doc_parms(cls): The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. """ - - -def _make_min_count_stat_function( - cls, - name: str, - name1: str, - name2: str, - axis_descr: str, - desc: str, - func: Callable, - see_also: str = "", - examples: str = "", -) -> Callable: - @doc( - _num_doc, - desc=desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count=_min_count_stub, - see_also=see_also, - examples=examples, - ) - def stat_func( - self, - axis=None, - skipna=None, - level=None, - numeric_only=None, - min_count=0, - **kwargs, - ): - if name == "sum": - nv.validate_sum(tuple(), kwargs) - elif name == "prod": - nv.validate_prod(tuple(), kwargs) - else: - nv.validate_stat_func(tuple(), kwargs, fname=name) - if skipna is None: - skipna = True - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level( - name, axis=axis, level=level, skipna=skipna, min_count=min_count - ) - return self._reduce( - func, - name=name, - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - min_count=min_count, - ) - - return set_function_name(stat_func, name, cls) - - -def _make_stat_function( - cls, - name: str, - name1: str, - name2: str, - axis_descr: str, - desc: str, - func: Callable, - see_also: str = "", - examples: str = "", -) -> Callable: - @doc( - _num_doc, - desc=desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also=see_also, - examples=examples, - ) - def stat_func( - self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs - ): - if name == "median": - nv.validate_median(tuple(), kwargs) - else: - nv.validate_stat_func(tuple(), kwargs, fname=name) - if skipna is None: - skipna = True - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) - return self._reduce( - func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only - ) - - return set_function_name(stat_func, name, cls) - - -def _make_stat_function_ddof( - cls, name: str, name1: str, name2: str, axis_descr: str, desc: str, func: Callable -) -> Callable: - @doc(_num_ddof_doc, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) - def stat_func( - self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs - ): - nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) - if skipna is None: - skipna = True - if axis is None: - axis = self._stat_axis_number - if level is not None: - return self._agg_by_level( - name, axis=axis, level=level, skipna=skipna, ddof=ddof - ) - return self._reduce( - func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof - ) - - return set_function_name(stat_func, name, cls) - - -def _make_cum_function( - cls, - name: str, - name1: str, - name2: str, - axis_descr: str, - desc: str, - accum_func: Callable, - accum_func_name: str, - examples: str, -) -> Callable: - @doc( - _cnum_doc, - desc=desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - accum_func_name=accum_func_name, - examples=examples, - ) - def cum_func(self, axis=None, skipna=True, *args, **kwargs): - skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) - if axis is None: - axis = self._stat_axis_number - else: - axis = self._get_axis_number(axis) - - if axis == 1: - return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T - - def block_accum_func(blk_values): - values = blk_values.T if hasattr(blk_values, "T") else blk_values - - result = nanops.na_accum_func(values, accum_func, skipna=skipna) - - result = result.T if hasattr(result, "T") else result - return result - - result = self._mgr.apply(block_accum_func) - - return self._constructor(result).__finalize__(self, method=name) - - return set_function_name(cum_func, name, cls) - - -def _make_logical_function( - cls, - name: str, - name1: str, - name2: str, - axis_descr: str, - desc: str, - func: Callable, - see_also: str, - examples: str, - empty_value: bool, -) -> Callable: - @doc( - _bool_doc, - desc=desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - see_also=see_also, - examples=examples, - empty_value=empty_value, - ) - def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): - nv.validate_logical_func(tuple(), kwargs, fname=name) - if level is not None: - if bool_only is not None: - raise NotImplementedError( - "Option bool_only is not implemented with option level." - ) - return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) - - if self.ndim > 1 and axis is None: - # Reduce along one dimension then the other, to simplify DataFrame._reduce - res = logical_func( - self, axis=0, bool_only=bool_only, skipna=skipna, **kwargs - ) - return logical_func(res, skipna=skipna, **kwargs) - - return self._reduce( - func, - name=name, - axis=axis, - skipna=skipna, - numeric_only=bool_only, - filter_type="bool", - ) - - return set_function_name(logical_func, name, cls) From 04d33b8f8721c85bf67cf1728be719d113ee026d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Oct 2020 09:33:47 -0700 Subject: [PATCH 11/17] CLN: require td64 in TimedeltaBlock (#37018) --- pandas/core/internals/blocks.py | 6 +++++- pandas/core/missing.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 54ac1a3fd52c2..3a4bdd54ad717 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2376,7 +2376,11 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def _maybe_coerce_values(self, values): if values.dtype != TD64NS_DTYPE: - # e.g. non-nano or int64 + # non-nano we will convert to nano + if values.dtype.kind != "m": + # caller is responsible for ensuring timedelta64 dtype + raise TypeError(values.dtype) # pragma: no cover + values = TimedeltaArray._from_sequence(values)._data if isinstance(values, TimedeltaArray): values = values._data diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f2ec04c1fc05d..52536583b9b0d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -573,9 +573,9 @@ def interpolate_2d( if ndim == 1: result = result[0] - if orig_values.dtype.kind == "M": - # convert float back to datetime64 - result = result.astype(orig_values.dtype) + if orig_values.dtype.kind in ["m", "M"]: + # convert float back to datetime64/timedelta64 + result = result.view(orig_values.dtype) return result From 8912516cea6beca2bb02e2a91540bf28a23104ac Mon Sep 17 00:00:00 2001 From: Micah Smith Date: Sat, 10 Oct 2020 12:46:24 -0400 Subject: [PATCH 12/17] BUG: Raise ValueError instead of bare Exception in sanitize_array (#35769) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/construction.py | 2 +- pandas/tests/series/test_constructors.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 22cbe521c5025..45e52db11cfaa 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -477,6 +477,7 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) - Fixed metadata propagation in the :class:`Series.dt` accessor (:issue:`28283`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) +- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 4751f6076f869..7901e150a7ff4 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -510,7 +510,7 @@ def sanitize_array( elif subarr.ndim > 1: if isinstance(data, np.ndarray): - raise Exception("Data must be 1-dimensional") + raise ValueError("Data must be 1-dimensional") else: subarr = com.asarray_tuplesafe(data, dtype=dtype) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4ad4917533422..a950ca78fc742 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -113,8 +113,8 @@ def test_constructor(self, datetime_series): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): assert not Series().index._is_all_dates - # exception raised is of type Exception - with pytest.raises(Exception, match="Data must be 1-dimensional"): + # exception raised is of type ValueError GH35744 + with pytest.raises(ValueError, match="Data must be 1-dimensional"): Series(np.random.randn(3, 3), index=np.arange(3)) mixed.name = "Series" From 1581bb1373e74a0bb69ca12202b9db6c258a87a5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Oct 2020 09:51:16 -0700 Subject: [PATCH 13/17] CLN: collected cleanups, warning suppression in tests (#37021) --- pandas/core/indexes/base.py | 4 +--- pandas/core/internals/blocks.py | 8 +++----- pandas/core/internals/concat.py | 4 ++-- pandas/io/pytables.py | 9 +++++++-- pandas/tests/extension/test_external_block.py | 9 --------- .../tests/frame/apply/test_frame_transform.py | 20 +++++++++++-------- pandas/tests/frame/test_api.py | 4 +++- pandas/tests/indexes/common.py | 2 -- pandas/tests/io/__init__.py | 17 ++++++++++++++++ pandas/tests/io/excel/__init__.py | 14 +++++++++---- pandas/tests/io/excel/test_readers.py | 2 -- pandas/tests/io/test_common.py | 5 ----- 12 files changed, 55 insertions(+), 43 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7ae36344a9966..50cd2076ae2f8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5404,9 +5404,7 @@ def _cmp_method(self, other, op): with np.errstate(all="ignore"): result = ops.comparison_op(self._values, np.asarray(other), op) - if is_bool_dtype(result): - return result - return ops.invalid_comparison(self, other, op) + return result def _arith_method(self, other, op): """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3a4bdd54ad717..c9869500469f4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -501,9 +501,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] # no need to downcast our float # unless indicated - if downcast is None and ( - self.is_float or self.is_timedelta or self.is_datetime - ): + if downcast is None and (self.is_float or self.is_datelike): return blocks return extend_blocks([b.downcast(downcast) for b in blocks]) @@ -638,7 +636,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): if isinstance(values, np.ndarray): values = values.reshape(self.shape) - newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim) + newb = self.make_block(values) if newb.is_numeric and self.is_numeric: if newb.shape != self.shape: @@ -2484,7 +2482,7 @@ def f(mask, val, idx): blocks = self.split_and_operate(None, f, False) else: values = f(None, self.values.ravel(), None) - blocks = [make_block(values, ndim=self.ndim, placement=self.mgr_locs)] + blocks = [self.make_block(values)] return blocks diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 7ad058cfeb83c..8d54f88558066 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -187,7 +187,7 @@ def __repr__(self) -> str: return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" @cache_readonly - def needs_filling(self): + def needs_filling(self) -> bool: for indexer in self.indexers.values(): # FIXME: cache results of indexer == -1 checks. if (indexer == -1).any(): @@ -206,7 +206,7 @@ def dtype(self): return get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0]) @cache_readonly - def is_na(self): + def is_na(self) -> bool: if self.block is None: return True diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3e3330fa4378f..2903ede1d5c0b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4728,8 +4728,13 @@ def _set_tz( assert values.tz is None or values.tz == tz if tz is not None: - name = getattr(values, "name", None) - values = values.ravel() + if isinstance(values, DatetimeIndex): + name = values.name + values = values.asi8 + else: + name = None + values = values.ravel() + tz = _ensure_decoded(tz) values = DatetimeIndex(values, name=name) values = values.tz_localize("UTC").tz_convert(tz) diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 1843126898f3d..e98545daaf049 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -11,15 +11,6 @@ class CustomBlock(ExtensionBlock): _holder = np.ndarray _can_hold_na = False - def concat_same_type(self, to_concat, placement=None): - """ - Always concatenate disregarding self.ndim as the values are - always 1D in this custom Block - """ - values = np.concatenate([blk.values for blk in to_concat]) - placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) - return self.make_block_same_class(values, placement=placement) - @pytest.fixture def df(): diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py index 01c6fd4ec08f0..1b259ddbd41dc 100644 --- a/pandas/tests/frame/apply/test_frame_transform.py +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -168,14 +168,18 @@ def test_transform_bad_dtype(op): if op in ("backfill", "shift", "pad", "bfill", "ffill"): pytest.xfail("Transform function works on any datatype") msg = "Transform function failed" - with pytest.raises(ValueError, match=msg): - df.transform(op) - with pytest.raises(ValueError, match=msg): - df.transform([op]) - with pytest.raises(ValueError, match=msg): - df.transform({"A": op}) - with pytest.raises(ValueError, match=msg): - df.transform({"A": [op]}) + + # tshift is deprecated + warn = None if op != "tshift" else FutureWarning + with tm.assert_produces_warning(warn, check_stacklevel=False): + with pytest.raises(ValueError, match=msg): + df.transform(op) + with pytest.raises(ValueError, match=msg): + df.transform([op]) + with pytest.raises(ValueError, match=msg): + df.transform({"A": op}) + with pytest.raises(ValueError, match=msg): + df.transform({"A": [op]}) @pytest.mark.parametrize("op", transformation_kernels) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 8b5d0c7ade56c..f5d1808f367e7 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -605,7 +605,9 @@ def test_constructor_expanddim_lookup(self): # raise NotImplementedError df = DataFrame() - inspect.getmembers(df) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # _AXIS_NUMBERS, _AXIS_NAMES lookups + inspect.getmembers(df) with pytest.raises(NotImplementedError, match="Not supported for DataFrames!"): df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 73d2e99d3ff5e..bc178c138341f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -613,8 +613,6 @@ def test_equals(self, index): def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() - if isinstance(index_a, PeriodIndex): - pytest.skip("Skip check for PeriodIndex") n = len(index_a) index_b = index_a[0:-1] diff --git a/pandas/tests/io/__init__.py b/pandas/tests/io/__init__.py index e69de29bb2d1d..c5e867f45b92d 100644 --- a/pandas/tests/io/__init__.py +++ b/pandas/tests/io/__init__.py @@ -0,0 +1,17 @@ +import pytest + +pytestmark = [ + # fastparquet + pytest.mark.filterwarnings( + "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning" + ), + # xlrd + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions. " + r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead." + ":PendingDeprecationWarning" + ), +] diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index 550172329fc57..419761cbe1d6d 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -1,6 +1,12 @@ import pytest -pytestmark = pytest.mark.filterwarnings( - # Looks like tree.getiterator is deprecated in favor of tree.iter - "ignore:This method will be removed in future versions:PendingDeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings( + # Looks like tree.getiterator is deprecated in favor of tree.iter + "ignore:This method will be removed in future versions:" + "PendingDeprecationWarning" + ), + pytest.mark.filterwarnings( + "ignore:This method will be removed in future versions:DeprecationWarning" + ), +] diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 4bdcc5b327fa7..800b4c79b9c09 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -635,8 +635,6 @@ def test_read_from_s3_url(self, read_ext, s3_resource, s3so): tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow - # ignore warning from old xlrd - @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning") def test_read_from_file_url(self, read_ext, datapath): # FILE diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ede8d61490778..2a6f3d1ad9380 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -245,11 +245,6 @@ def test_read_expands_user_home_dir( ), ], ) - @pytest.mark.filterwarnings( - "ignore:This method will be removed in future versions. " - r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead." - ":PendingDeprecationWarning" - ) def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) path = datapath(*path) From a57d17482320da8c82278ffa4e41b0eafb4b0c46 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 10 Oct 2020 13:21:41 -0700 Subject: [PATCH 14/17] REF/TYP: use OpsMixin for DataFrame --- pandas/core/frame.py | 93 ++++++++++++++++++- pandas/core/ops/__init__.py | 135 +++------------------------ pandas/core/ops/methods.py | 180 ++++++++++++------------------------ 3 files changed, 158 insertions(+), 250 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8a330e3d595cf..eabda93aedbad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -124,6 +124,7 @@ relabel_result, transform, ) +from pandas.core.arraylike import OpsMixin from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray from pandas.core.arrays.sparse import SparseFrameAccessor @@ -339,7 +340,7 @@ # DataFrame class -class DataFrame(NDFrame): +class DataFrame(OpsMixin, NDFrame): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -423,6 +424,7 @@ def _constructor(self) -> Type[DataFrame]: _constructor_sliced: Type[Series] = Series _deprecations: FrozenSet[str] = NDFrame._deprecations | frozenset([]) _accessors: Set[str] = {"sparse"} + __hash__ = NDFrame.__hash__ @property def _constructor_expanddim(self): @@ -5879,7 +5881,87 @@ def reorder_levels(self, order, axis=0) -> DataFrame: return result # ---------------------------------------------------------------------- - # Arithmetic / combination related + # Arithmetic Methods + + def _cmp_method(self, other, op): + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if ops._should_reindex_frame_op(self, other, op, 1, 1, None, None): + return ops._frame_arith_method_with_reindex(self, other, op) + + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + bm = self._mgr.apply(array_op, right=right) + return type(self)(bm) + + elif isinstance(right, DataFrame): + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex + + bm = self._mgr.operate_blockwise(right._mgr, array_op) + return type(self)(bm) + + elif isinstance(right, Series) and axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + arrays = [array_op(l, r) for l, r in zip(self._iter_column_arrays(), right)] + + elif isinstance(right, Series): + assert right.index.equals(self.index) # Handle other cases later + right = right._values + + arrays = [array_op(l, right) for l in self._iter_column_arrays()] + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + return type(self)._from_arrays( + arrays, self.columns, self.index, verify_integrity=False + ) def _combine_frame(self, other: DataFrame, func, fill_value=None): # at this point we have `self._indexed_same(other)` @@ -5898,7 +5980,7 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - new_data = ops.dispatch_to_series(self, other, _arith_op) + new_data = self._dispatch_frame_op(other, _arith_op) return new_data def _construct_result(self, result) -> DataFrame: @@ -5920,6 +6002,9 @@ def _construct_result(self, result) -> DataFrame: out.index = self.index return out + # ---------------------------------------------------------------------- + # Combination-Related + @Appender( """ Returns @@ -7295,7 +7380,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: bm_axis = self._get_block_manager_axis(axis) if bm_axis == 0 and periods != 0: - return self - self.shift(periods, axis=axis) # type: ignore[operator] + return self - self.shift(periods, axis=axis) new_data = self._mgr.diff(n=periods, axis=bm_axis) return self._constructor(new_data) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index b656aef64cde9..3c0b4dfda5446 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -4,12 +4,11 @@ This is not a public API. """ import operator -from typing import TYPE_CHECKING, Optional, Set, Type +from typing import TYPE_CHECKING, Optional, Set import warnings import numpy as np -from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 from pandas._typing import Level from pandas.util._decorators import Appender @@ -28,7 +27,6 @@ ) from pandas.core.ops.common import unpack_zerodim_and_defer # noqa:F401 from pandas.core.ops.docstrings import ( - _arith_doc_FRAME, _flex_comp_doc_FRAME, _make_flex_doc, _op_descriptions, @@ -211,70 +209,6 @@ def fill_binop(left, right, fill_value): return left, right -# ----------------------------------------------------------------------------- -# Dispatch logic - - -def dispatch_to_series(left, right, func, axis: Optional[int] = None): - """ - Evaluate the frame operation func(left, right) by evaluating - column-by-column, dispatching to the Series implementation. - - Parameters - ---------- - left : DataFrame - right : scalar, Series, or DataFrame - func : arithmetic or comparison operator - axis : {None, 0, 1} - - Returns - ------- - DataFrame - """ - # Get the appropriate array-op to apply to each column/block's values. - array_op = get_array_op(func) - - right = lib.item_from_zerodim(right) - if not is_list_like(right): - # i.e. scalar, faster than checking np.ndim(right) == 0 - bm = left._mgr.apply(array_op, right=right) - return type(left)(bm) - - elif isinstance(right, ABCDataFrame): - assert left.index.equals(right.index) - assert left.columns.equals(right.columns) - # TODO: The previous assertion `assert right._indexed_same(left)` - # fails in cases with empty columns reached via - # _frame_arith_method_with_reindex - - bm = left._mgr.operate_blockwise(right._mgr, array_op) - return type(left)(bm) - - elif isinstance(right, ABCSeries) and axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(left.columns) - - right = right._values - # maybe_align_as_frame ensures we do not have an ndarray here - assert not isinstance(right, np.ndarray) - - arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)] - - elif isinstance(right, ABCSeries): - assert right.index.equals(left.index) # Handle other cases later - right = right._values - - arrays = [array_op(l, right) for l in left._iter_column_arrays()] - - else: - # Remaining cases have less-obvious dispatch rules - raise NotImplementedError(right) - - return type(left)._from_arrays( - arrays, left.columns, left.index, verify_integrity=False - ) - - # ----------------------------------------------------------------------------- # Series @@ -299,9 +233,8 @@ def align_method_SERIES(left: "Series", right, align_asobject: bool = False): return left, right -def flex_method_SERIES(cls, op, special): - assert not special # "special" also means "not flex" - name = _get_op_name(op, special) +def flex_method_SERIES(op): + name = _get_op_name(op, False) doc = _make_flex_doc(name, "series") @Appender(doc) @@ -427,7 +360,7 @@ def to_series(right): "Do `left, right = left.align(right, axis=1, copy=False)` " "before e.g. `left == right`", FutureWarning, - stacklevel=3, + stacklevel=5, ) left, right = left.align( @@ -533,10 +466,9 @@ def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int return type(frame)(rvalues, index=frame.index, columns=frame.columns) -def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert not special - op_name = _get_op_name(op, special) - default_axis = None if special else "columns" +def flex_arith_method_FRAME(op): + op_name = _get_op_name(op, False) + default_axis = "columns" na_op = get_array_op(op) doc = _make_flex_doc(op_name, "dataframe") @@ -563,37 +495,14 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): - new_data = dispatch_to_series(self, other, op, axis=axis) + new_data = self._dispatch_frame_op(other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) - new_data = dispatch_to_series(self, other, op) - - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - - -def arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert special - op_name = _get_op_name(op, special) - doc = _arith_doc_FRAME % op_name - - @Appender(doc) - def f(self, other): - - if _should_reindex_frame_op(self, other, op, 1, 1, None, None): - return _frame_arith_method_with_reindex(self, other, op) - - axis = 1 # only relevant for Series other case + new_data = self._dispatch_frame_op(other, op) - self, other = align_method_FRAME(self, other, axis, flex=True, level=None) - - new_data = dispatch_to_series(self, other, op, axis=axis) return self._construct_result(new_data) f.__name__ = op_name @@ -601,9 +510,8 @@ def f(self, other): return f -def flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert not special # "special" also means "not flex" - op_name = _get_op_name(op, special) +def flex_comp_method_FRAME(op): + op_name = _get_op_name(op, False) default_axis = "columns" # because we are "flex" doc = _flex_comp_doc_FRAME.format( @@ -616,26 +524,7 @@ def f(self, other, axis=default_axis, level=None): self, other = align_method_FRAME(self, other, axis, flex=True, level=level) - new_data = dispatch_to_series(self, other, op, axis=axis) - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - - -def comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert special # "special" also means "not flex" - op_name = _get_op_name(op, special) - - @Appender(f"Wrapper for comparison method {op_name}") - def f(self, other): - axis = 1 # only relevant for Series other case - - self, other = align_method_FRAME(self, other, axis, flex=False, level=None) - - # See GH#4537 for discussion of scalar op behavior - new_data = dispatch_to_series(self, other, op, axis=axis) + new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) f.__name__ = op_name diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 4fe172d3a2baa..c05f457f1e4f5 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -7,16 +7,13 @@ from pandas.core.ops.roperator import ( radd, - rand_, rdivmod, rfloordiv, rmod, rmul, - ror_, rpow, rsub, rtruediv, - rxor, ) @@ -33,19 +30,10 @@ def _get_method_wrappers(cls): ------- arith_flex : function or None comp_flex : function or None - arith_special : function - comp_special : function - bool_special : function - - Notes - ----- - None is only returned for SparseArray """ # TODO: make these non-runtime imports once the relevant functions # are no longer in __init__ from pandas.core.ops import ( - arith_method_FRAME, - comp_method_FRAME, flex_arith_method_FRAME, flex_comp_method_FRAME, flex_method_SERIES, @@ -55,16 +43,10 @@ def _get_method_wrappers(cls): # Just Series arith_flex = flex_method_SERIES comp_flex = flex_method_SERIES - arith_special = None - comp_special = None - bool_special = None elif issubclass(cls, ABCDataFrame): arith_flex = flex_arith_method_FRAME comp_flex = flex_comp_method_FRAME - arith_special = arith_method_FRAME - comp_special = comp_method_FRAME - bool_special = arith_method_FRAME - return arith_flex, comp_flex, arith_special, comp_special, bool_special + return arith_flex, comp_flex def add_special_arithmetic_methods(cls): @@ -77,12 +59,7 @@ def add_special_arithmetic_methods(cls): cls : class special methods will be defined and pinned to this class """ - _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) - new_methods = _create_methods( - cls, arith_method, comp_method, bool_method, special=True - ) - # inplace operators (I feel like these should get passed an `inplace=True` - # or just be removed + new_methods = {} def _wrap_inplace_method(method): """ @@ -105,46 +82,25 @@ def f(self, other): f.__name__ = f"__i{name}__" return f - if bool_method is None: - # Series gets bool_method, arith_method via OpsMixin - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(cls.__add__), - __isub__=_wrap_inplace_method(cls.__sub__), - __imul__=_wrap_inplace_method(cls.__mul__), - __itruediv__=_wrap_inplace_method(cls.__truediv__), - __ifloordiv__=_wrap_inplace_method(cls.__floordiv__), - __imod__=_wrap_inplace_method(cls.__mod__), - __ipow__=_wrap_inplace_method(cls.__pow__), - ) - ) - new_methods.update( - dict( - __iand__=_wrap_inplace_method(cls.__and__), - __ior__=_wrap_inplace_method(cls.__or__), - __ixor__=_wrap_inplace_method(cls.__xor__), - ) - ) - - else: - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(new_methods["__add__"]), - __isub__=_wrap_inplace_method(new_methods["__sub__"]), - __imul__=_wrap_inplace_method(new_methods["__mul__"]), - __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), - __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), - __imod__=_wrap_inplace_method(new_methods["__mod__"]), - __ipow__=_wrap_inplace_method(new_methods["__pow__"]), - ) + # wrap methods that we get from OpsMixin + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(cls.__add__), + __isub__=_wrap_inplace_method(cls.__sub__), + __imul__=_wrap_inplace_method(cls.__mul__), + __itruediv__=_wrap_inplace_method(cls.__truediv__), + __ifloordiv__=_wrap_inplace_method(cls.__floordiv__), + __imod__=_wrap_inplace_method(cls.__mod__), + __ipow__=_wrap_inplace_method(cls.__pow__), ) - new_methods.update( - dict( - __iand__=_wrap_inplace_method(new_methods["__and__"]), - __ior__=_wrap_inplace_method(new_methods["__or__"]), - __ixor__=_wrap_inplace_method(new_methods["__xor__"]), - ) + ) + new_methods.update( + dict( + __iand__=_wrap_inplace_method(cls.__and__), + __ior__=_wrap_inplace_method(cls.__or__), + __ixor__=_wrap_inplace_method(cls.__xor__), ) + ) _add_methods(cls, new_methods=new_methods) @@ -159,10 +115,8 @@ def add_flex_arithmetic_methods(cls): cls : class flex methods will be defined and pinned to this class """ - flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) - new_methods = _create_methods( - cls, flex_arith_method, flex_comp_method, bool_method=None, special=False - ) + flex_arith_method, flex_comp_method = _get_method_wrappers(cls) + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method) new_methods.update( dict( multiply=new_methods["mul"], @@ -176,72 +130,52 @@ def add_flex_arithmetic_methods(cls): _add_methods(cls, new_methods=new_methods) -def _create_methods(cls, arith_method, comp_method, bool_method, special): - # creates actual methods based upon arithmetic, comp and bool method +def _create_methods(cls, arith_method, comp_method): + # creates actual flex methods based upon arithmetic, and comp method # constructors. have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series new_methods = {} - if arith_method is not None: - new_methods.update( - dict( - add=arith_method(cls, operator.add, special), - radd=arith_method(cls, radd, special), - sub=arith_method(cls, operator.sub, special), - mul=arith_method(cls, operator.mul, special), - truediv=arith_method(cls, operator.truediv, special), - floordiv=arith_method(cls, operator.floordiv, special), - mod=arith_method(cls, operator.mod, special), - pow=arith_method(cls, operator.pow, special), - # not entirely sure why this is necessary, but previously was included - # so it's here to maintain compatibility - rmul=arith_method(cls, rmul, special), - rsub=arith_method(cls, rsub, special), - rtruediv=arith_method(cls, rtruediv, special), - rfloordiv=arith_method(cls, rfloordiv, special), - rpow=arith_method(cls, rpow, special), - rmod=arith_method(cls, rmod, special), - ) - ) - new_methods["div"] = new_methods["truediv"] - new_methods["rdiv"] = new_methods["rtruediv"] - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods["divmod"] = arith_method(cls, divmod, special) - new_methods["rdivmod"] = arith_method(cls, rdivmod, special) - - if comp_method is not None: - # Series already has this pinned - new_methods.update( - dict( - eq=comp_method(cls, operator.eq, special), - ne=comp_method(cls, operator.ne, special), - lt=comp_method(cls, operator.lt, special), - gt=comp_method(cls, operator.gt, special), - le=comp_method(cls, operator.le, special), - ge=comp_method(cls, operator.ge, special), - ) + + new_methods.update( + dict( + add=arith_method(operator.add), + radd=arith_method(radd), + sub=arith_method(operator.sub), + mul=arith_method(operator.mul), + truediv=arith_method(operator.truediv), + floordiv=arith_method(operator.floordiv), + mod=arith_method(operator.mod), + pow=arith_method(operator.pow), + rmul=arith_method(rmul), + rsub=arith_method(rsub), + rtruediv=arith_method(rtruediv), + rfloordiv=arith_method(rfloordiv), + rpow=arith_method(rpow), + rmod=arith_method(rmod), ) + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(divmod) + new_methods["rdivmod"] = arith_method(rdivmod) - if bool_method is not None: - new_methods.update( - dict( - and_=bool_method(cls, operator.and_, special), - or_=bool_method(cls, operator.or_, special), - xor=bool_method(cls, operator.xor, special), - rand_=bool_method(cls, rand_, special), - ror_=bool_method(cls, ror_, special), - rxor=bool_method(cls, rxor, special), - ) + new_methods.update( + dict( + eq=comp_method(operator.eq), + ne=comp_method(operator.ne), + lt=comp_method(operator.lt), + gt=comp_method(operator.gt), + le=comp_method(operator.le), + ge=comp_method(operator.ge), ) + ) - if special: - dunderize = lambda x: f"__{x.strip('_')}__" - else: - dunderize = lambda x: x - new_methods = {dunderize(k): v for k, v in new_methods.items()} + new_methods = {k.strip("_"): v for k, v in new_methods.items()} return new_methods From afd25cc08b93fad77f9ef44b222acb4f024b1732 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 10 Oct 2020 15:52:27 -0700 Subject: [PATCH 15/17] CLN: remove get_op_name --- pandas/core/ops/__init__.py | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 3c0b4dfda5446..8394be8621ac5 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -141,29 +141,6 @@ def _maybe_match_name(a, b): return None -# ----------------------------------------------------------------------------- - - -def _get_op_name(op, special: bool) -> str: - """ - Find the name to attach to this method according to conventions - for special and non-special methods. - - Parameters - ---------- - op : binary operator - special : bool - - Returns - ------- - op_name : str - """ - opname = op.__name__.strip("_") - if special: - opname = f"__{opname}__" - return opname - - # ----------------------------------------------------------------------------- # Masking NA values and fallbacks for operations numpy does not support @@ -234,7 +211,7 @@ def align_method_SERIES(left: "Series", right, align_asobject: bool = False): def flex_method_SERIES(op): - name = _get_op_name(op, False) + name = op.__name__.strip("_") doc = _make_flex_doc(name, "series") @Appender(doc) @@ -467,7 +444,7 @@ def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int def flex_arith_method_FRAME(op): - op_name = _get_op_name(op, False) + op_name = op.__name__.strip("_") default_axis = "columns" na_op = get_array_op(op) @@ -511,7 +488,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): def flex_comp_method_FRAME(op): - op_name = _get_op_name(op, False) + op_name = op.__name__.strip("_") default_axis = "columns" # because we are "flex" doc = _flex_comp_doc_FRAME.format( From fdc68db081d26fa03b26f08cac8965d7c70086a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 10 Oct 2020 16:58:04 -0700 Subject: [PATCH 16/17] mypy fixup --- pandas/core/dtypes/cast.py | 4 +--- pandas/core/frame.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3ad9f195c3cae..1cea817abbaa3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -496,9 +496,7 @@ def maybe_casted_values(index, codes=None): values, _ = maybe_upcast_putmask(values, mask, np.nan) if issubclass(values_type, DatetimeLikeArrayMixin): - values = values_type( - values, dtype=values_dtype - ) # type: ignore[call-arg] + values = values_type(values, dtype=values_dtype) return values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab1ddd40c6372..455bd8fcda823 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -337,7 +337,7 @@ # DataFrame class -class DataFrame(OpsMixin, NDFrame): +class DataFrame(NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -421,7 +421,6 @@ def _constructor(self) -> Type[DataFrame]: _constructor_sliced: Type[Series] = Series _deprecations: FrozenSet[str] = NDFrame._deprecations | frozenset([]) _accessors: Set[str] = {"sparse"} - __hash__ = NDFrame.__hash__ @property def _constructor_expanddim(self): From 296e45bd951a029b0afe7e4769c794ba49f8b26b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 10 Oct 2020 17:33:57 -0700 Subject: [PATCH 17/17] de-privatize --- pandas/core/frame.py | 4 ++-- pandas/core/ops/__init__.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 455bd8fcda823..43c87cc919980 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5851,8 +5851,8 @@ def _cmp_method(self, other, op): return self._construct_result(new_data) def _arith_method(self, other, op): - if ops._should_reindex_frame_op(self, other, op, 1, 1, None, None): - return ops._frame_arith_method_with_reindex(self, other, op) + if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None): + return ops.frame_arith_method_with_reindex(self, other, op) axis = 1 # only relevant for Series other case diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 8394be8621ac5..87da8f8fa146c 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -348,7 +348,7 @@ def to_series(right): return left, right -def _should_reindex_frame_op( +def should_reindex_frame_op( left: "DataFrame", right, op, axis, default_axis, fill_value, level ) -> bool: """ @@ -374,7 +374,7 @@ def _should_reindex_frame_op( return False -def _frame_arith_method_with_reindex( +def frame_arith_method_with_reindex( left: "DataFrame", right: "DataFrame", op ) -> "DataFrame": """ @@ -453,10 +453,10 @@ def flex_arith_method_FRAME(op): @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): - if _should_reindex_frame_op( + if should_reindex_frame_op( self, other, op, axis, default_axis, fill_value, level ): - return _frame_arith_method_with_reindex(self, other, op) + return frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going