From 52fa07aa46633dd283da812fd441f58edee2c568 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 18:01:47 -0800 Subject: [PATCH 1/2] TST: port Dim2CompatTests --- pandas/core/indexes/extension.py | 4 +- pandas/core/ops/mask_ops.py | 2 +- pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/dim2.py | 217 ++++++++++++++++++++++++ pandas/tests/extension/test_datetime.py | 4 + pandas/tests/extension/test_numpy.py | 4 + pandas/tests/extension/test_period.py | 4 + 7 files changed, 233 insertions(+), 3 deletions(-) create mode 100644 pandas/tests/extension/base/dim2.py diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 4150ec745bd2e..301fe51d0f37e 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -240,8 +240,8 @@ def __getitem__(self, key): return type(self)(result, name=self.name) # Unpack to ndarray for MPL compat - # error: "ExtensionArray" has no attribute "_data" - result = result._data # type: ignore[attr-defined] + # error: "ExtensionArray" has no attribute "_ndarray" + result = result._ndarray # type: ignore[attr-defined] # Includes cases where we get a 2D ndarray back for MPL compat deprecate_ndim_indexing(result) diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index a9edb2d138246..501bc0159e641 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -179,6 +179,6 @@ def kleene_and( return result, mask -def raise_for_nan(value, method): +def raise_for_nan(value, method: str): if lib.is_float(value) and np.isnan(value): raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 9cf3bdab40d0b..910b43a2cd148 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -43,6 +43,7 @@ class TestMyDtype(BaseDtypeTests): """ from pandas.tests.extension.base.casting import BaseCastingTests # noqa from pandas.tests.extension.base.constructors import BaseConstructorsTests # noqa +from pandas.tests.extension.base.dim2 import Dim2CompatTests # noqa from pandas.tests.extension.base.dtype import BaseDtypeTests # noqa from pandas.tests.extension.base.getitem import BaseGetitemTests # noqa from pandas.tests.extension.base.groupby import BaseGroupbyTests # noqa diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py new file mode 100644 index 0000000000000..c6455ce15533a --- /dev/null +++ b/pandas/tests/extension/base/dim2.py @@ -0,0 +1,217 @@ +""" +Tests for 2D compatibility. +""" +import numpy as np +import pytest + +from pandas.compat import np_version_under1p17 + +import pandas as pd +from pandas.core.arrays import ( + FloatingArray, + IntegerArray, +) +from pandas.tests.extension.base.base import BaseExtensionTests + + +def maybe_xfail_masked_reductions(arr, request): + if ( + isinstance(arr, (FloatingArray, IntegerArray)) + and np_version_under1p17 + and arr.ndim == 2 + ): + mark = pytest.mark.xfail(reason="masked_reductions does not implement") + request.node.add_marker(mark) + + +class Dim2CompatTests(BaseExtensionTests): + def test_take_2d(self, data): + arr2d = data.reshape(-1, 1) + + result = arr2d.take([0, 0, -1], axis=0) + + expected = data.take([0, 0, -1]).reshape(-1, 1) + self.assert_extension_array_equal(result, expected) + + def test_repr_2d(self, data): + # this could fail in a corner case where an element contained the name + res = repr(data.reshape(1, -1)) + assert res.count(f"<{type(data).__name__}") == 1 + + res = repr(data.reshape(-1, 1)) + assert res.count(f"<{type(data).__name__}") == 1 + + def test_reshape(self, data): + arr2d = data.reshape(-1, 1) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + arr2d = data.reshape((-1, 1)) + assert arr2d.shape == (data.size, 1) + assert len(arr2d) == len(data) + + with pytest.raises(ValueError): + data.reshape((data.size, 2)) + with pytest.raises(ValueError): + data.reshape(data.size, 2) + + def test_getitem_2d(self, data): + arr2d = data.reshape(1, -1) + + result = arr2d[0] + self.assert_extension_array_equal(result, data) + + with pytest.raises(IndexError): + arr2d[1] + + with pytest.raises(IndexError): + arr2d[-2] + + result = arr2d[:] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, :] + self.assert_extension_array_equal(result, arr2d) + + result = arr2d[:, 0] + expected = data[[0]] + self.assert_extension_array_equal(result, expected) + + # dimension-expanding getitem on 1D + result = data[:, np.newaxis] + self.assert_extension_array_equal(result, arr2d.T) + + def test_iter_2d(self, data): + arr2d = data.reshape(1, -1) + + objs = list(iter(arr2d)) + assert len(objs) == arr2d.shape[0] + + for obj in objs: + assert isinstance(obj, type(data)) + assert obj.dtype == data.dtype + assert obj.ndim == 1 + assert len(obj) == arr2d.shape[1] + + def test_concat_2d(self, data): + left = data.reshape(-1, 1) + right = left.copy() + + # axis=0 + result = left._concat_same_type([left, right], axis=0) + expected = data._concat_same_type([data, data]).reshape(-1, 1) + self.assert_extension_array_equal(result, expected) + + # axis=1 + result = left._concat_same_type([left, right], axis=1) + expected = data.repeat(2).reshape(-1, 2) + self.assert_extension_array_equal(result, expected) + + # axis > 1 -> invalid + with pytest.raises(ValueError): + left._concat_same_type([left, right], axis=2) + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis_none(self, data, method, request): + if not hasattr(data, method): + pytest.skip("test is not applicable for this type/dtype") + + arr2d = data.reshape(1, -1) + maybe_xfail_masked_reductions(arr2d, request) + + err_expected = None + err_result = None + try: + expected = getattr(data, method)() + except Exception as err: + # if the 1D reduction is invalid, the 2D reduction should be as well + err_expected = err + try: + result = getattr(arr2d, method)(axis=None) + except Exception as err2: + err_result = err2 + + else: + result = getattr(arr2d, method)(axis=None) + + if err_result is not None or err_expected is not None: + assert type(err_result) == type(err_expected) + return + + assert result == expected # TODO: or matching NA + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis0(self, data, method, request): + if not hasattr(data, method): + pytest.skip("test is not applicable for this type/dtype") + + arr2d = data.reshape(1, -1) + maybe_xfail_masked_reductions(arr2d, request) + + kwargs = {} + if method == "std": + # pass ddof=0 so we get all-zero std instead of all-NA std + kwargs["ddof"] = 0 + + try: + result = getattr(arr2d, method)(axis=0, **kwargs) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + if method in ["mean", "median", "sum", "prod"]: + # std and var are not dtype-preserving + expected = data + if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]: + # FIXME: kludge + if data.dtype.kind == "i": + dtype = pd.Int64Dtype + else: + dtype = pd.UInt64Dtype + + expected = data.astype(dtype) + if type(expected) != type(data): + mark = pytest.mark.xfail( + reason="IntegerArray.astype is broken GH#38983" + ) + request.node.add_marker(mark) + assert type(expected) == type(data), type(expected) + assert dtype == expected.dtype + + self.assert_extension_array_equal(result, expected) + elif method == "std": + self.assert_extension_array_equal(result, data - data) + # punt on method == "var" + + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) + def test_reductions_2d_axis1(self, data, method, request): + if not hasattr(data, method): + pytest.skip("test is not applicable for this type/dtype") + + arr2d = data.reshape(1, -1) + maybe_xfail_masked_reductions(arr2d, request) + + try: + result = getattr(arr2d, method)(axis=1) + except Exception as err: + try: + getattr(data, method)() + except Exception as err2: + assert type(err) == type(err2) + return + else: + raise AssertionError("Both reductions should raise or neither") + + # not necesarrily type/dtype-preserving, so weaker assertions + assert result.shape == (1,) + expected_scalar = getattr(data, method)() + if pd.isna(result[0]): + # TODO: require matching NA + assert pd.isna(expected_scalar), expected_scalar + else: + assert result[0] == expected_scalar diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 6c5963402b3d7..33589027c0d0f 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -235,3 +235,7 @@ class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): pass + + +class Test2DCompat(BaseDatetimeTests, base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 17f29e02a2883..ef6a6e6098a19 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -415,3 +415,7 @@ def test_setitem_loc_iloc_slice(self, data): @skip_nested class TestParsing(BaseNumPyTests, base.BaseParsingTests): pass + + +class Test2DCompat(BaseNumPyTests, base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index bbb991259ac29..4c845055b56c4 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -184,3 +184,7 @@ class TestParsing(BasePeriodTests, base.BaseParsingTests): @pytest.mark.parametrize("engine", ["c", "python"]) def test_EA_types(self, engine, data): super().test_EA_types(engine, data) + + +class Test2DCompat(BasePeriodTests, base.Dim2CompatTests): + pass From 84613c7826a5cebfa8da62cc150277e05b6822f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 7 Mar 2021 19:55:20 -0800 Subject: [PATCH 2/2] ENH: NDArrayBackedExtensionArray.fillna(method) with 2d --- pandas/core/arrays/_mixins.py | 8 +++++-- pandas/core/arrays/period.py | 8 +++++++ pandas/core/missing.py | 12 ++++------- pandas/tests/arrays/test_datetimes.py | 31 +++++++++++++++++++++++++++ pandas/tests/extension/base/dim2.py | 11 ++++++++++ 5 files changed, 60 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 4615cb4ec7abd..d54d1855ac2f8 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -278,8 +278,12 @@ def fillna( if mask.any(): if method is not None: - func = missing.get_fill_func(method) - new_values, _ = func(self._ndarray.copy(), limit=limit, mask=mask) + # TODO: check value is None + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + new_values, _ = func(self._ndarray.T.copy(), limit=limit, mask=mask.T) + new_values = new_values.T + # TODO: PandasArray didn't used to copy, need tests for this new_values = self._from_backing_data(new_values) else: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 96a159c0804c9..7e9e13400e11f 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -639,6 +639,14 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: m8arr = self._ndarray.view("M8[ns]") return m8arr.searchsorted(value, side=side, sorter=sorter) + def fillna(self, value=None, method=None, limit=None) -> PeriodArray: + if method is not None: + # view as dt64 so we get treated as timelike in core.missing + dta = self.view("M8[ns]") + result = dta.fillna(value=value, method=method, limit=limit) + return result.view(self.dtype) + return super().fillna(value=value, method=method, limit=limit) + # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1b5a7237b5287..dc42a175409c2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -646,8 +646,6 @@ def interpolate_2d( values, ) - orig_values = values - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) # reshape a 1 dim if needed @@ -669,10 +667,6 @@ def interpolate_2d( if ndim == 1: result = result[0] - if orig_values.dtype.kind in ["m", "M"]: - # convert float back to datetime64/timedelta64 - result = result.view(orig_values.dtype) - return result @@ -755,9 +749,11 @@ def _backfill_2d(values, limit=None, mask=None): _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} -def get_fill_func(method): +def get_fill_func(method, ndim: int = 1): method = clean_fill_method(method) - return _fill_methods[method] + if ndim == 1: + return _fill_methods[method] + return {"pad": _pad_2d, "backfill": _backfill_2d}[method] def clean_reindex_fill_method(method): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index d159d76030250..8e6c330475e68 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -195,6 +195,37 @@ def test_fillna_preserves_tz(self, method): assert arr[2] is pd.NaT assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central") + def test_fillna_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2).copy() + dta[0, 1] = pd.NaT + dta[1, 0] = pd.NaT + + res1 = dta.fillna(method="pad") + expected1 = dta.copy() + expected1[1, 0] = dta[0, 0] + tm.assert_extension_array_equal(res1, expected1) + + res2 = dta.fillna(method="backfill") + expected2 = dta.copy() + expected2 = dta.copy() + expected2[1, 0] = dta[2, 0] + expected2[0, 1] = dta[1, 1] + tm.assert_extension_array_equal(res2, expected2) + + # with different ordering for underlying ndarray; behavior should + # be unchanged + dta2 = dta._from_backing_data(dta._ndarray.copy(order="F")) + assert dta2._ndarray.flags["F_CONTIGUOUS"] + assert not dta2._ndarray.flags["C_CONTIGUOUS"] + tm.assert_extension_array_equal(dta, dta2) + + res3 = dta2.fillna(method="pad") + tm.assert_extension_array_equal(res3, expected1) + + res4 = dta2.fillna(method="backfill") + tm.assert_extension_array_equal(res4, expected2) + def test_array_interface_tz(self): tz = "US/Central" data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz)) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index fbe2537e8a7bf..073880d79d872 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -131,6 +131,17 @@ def test_concat_2d(self, data): with pytest.raises(ValueError): left._concat_same_type([left, right], axis=2) + @pytest.mark.parametrize("method", ["backfill", "pad"]) + def test_fillna_2d_method(self, data_missing, method): + arr = data_missing.repeat(2).reshape(2, 2) + assert arr[0].isna().all() + assert not arr[1].isna().any() + + result = arr.fillna(method=method) + + expected = data_missing.fillna(method=method).repeat(2).reshape(2, 2) + self.assert_extension_array_equal(result, expected) + @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) def test_reductions_2d_axis_none(self, data, method, request): if not hasattr(data, method):