From f65492e78df01f3c0bce87f4e4a06cb950994441 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 21 Mar 2020 14:45:25 +0100 Subject: [PATCH 1/4] [#31989] pandas/conftest.py indices fixture refactoring From 1c9a2bbdb8f695254d18724853730de1e686c44b Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 21 Mar 2020 15:22:20 +0100 Subject: [PATCH 2/4] replace tm.all_index_generator with indices fixture --- pandas/_testing.py | 26 -------------------- pandas/tests/generic/test_frame.py | 12 +++++++--- pandas/tests/generic/test_generic.py | 11 ++++----- pandas/tests/generic/test_series.py | 33 ++++++++++++++++---------- pandas/tests/indexing/test_indexing.py | 29 ++++++++++++---------- pandas/tests/series/test_apply.py | 10 +++++--- 6 files changed, 58 insertions(+), 63 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index f96e3872eb8bd..a0995d88f85db 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1683,32 +1683,6 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None) return df -def all_index_generator(k=10): - """ - Generator which can be iterated over to get instances of all the various - index classes. - - Parameters - ---------- - k: length of each of the index instances - """ - all_make_index_funcs = [ - makeIntIndex, - makeFloatIndex, - makeStringIndex, - makeUnicodeIndex, - makeDateIndex, - makePeriodIndex, - makeTimedeltaIndex, - makeBoolIndex, - makeRangeIndex, - makeIntervalIndex, - makeCategoricalIndex, - ] - for make_index_func in all_make_index_funcs: - yield make_index_func(k=k) - - def index_subclass_makers_generator(): make_index_funcs = [ makeDateIndex, diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 631f484cfc22a..178782c299483 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -7,6 +7,8 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.generic import ABCMultiIndex + import pandas as pd from pandas import DataFrame, MultiIndex, Series, date_range import pandas._testing as tm @@ -245,8 +247,12 @@ class TestToXArray: and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), reason="xarray >= 0.10.0 required", ) - @pytest.mark.parametrize("index", tm.all_index_generator(3)) - def test_to_xarray_index_types(self, index): + def test_to_xarray_index_types(self, indices): + if isinstance(indices, ABCMultiIndex): + pytest.skip("MultiIndex is tested separately") + if len(indices) == 0: + pytest.skip("Test doesn't make sense for empty index") + from xarray import Dataset df = DataFrame( @@ -262,7 +268,7 @@ def test_to_xarray_index_types(self, index): } ) - df.index = index + df.index = indices[:3] df.index.name = "foo" df.columns.name = "bar" result = df.to_xarray() diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index f6005a0f839a3..1a4a0b1678aa4 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -249,14 +249,13 @@ def test_metadata_propagation(self): self.check_metadata(v1 & v2) self.check_metadata(v1 | v2) - @pytest.mark.parametrize("index", tm.all_index_generator(10)) - def test_head_tail(self, index): + def test_head_tail(self, indices): # GH5370 - o = self._construct(shape=10) + o = self._construct(shape=len(indices)) axis = o._get_axis_name(0) - setattr(o, axis, index) + setattr(o, axis, indices) o.head() @@ -272,8 +271,8 @@ def test_head_tail(self, index): self._compare(o.tail(len(o) + 1), o) # neg index - self._compare(o.head(-3), o.head(7)) - self._compare(o.tail(-3), o.tail(7)) + self._compare(o.head(-3), o.head(len(indices) - 3)) + self._compare(o.tail(-3), o.tail(len(indices) - 3)) def test_sample(self): # Fixes issue: 2419 diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 388bb8e3f636d..12f9500d38ba1 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -10,6 +10,7 @@ from pandas import MultiIndex, Series, date_range import pandas._testing as tm +from ...core.dtypes.generic import ABCMultiIndex from .test_generic import Generic try: @@ -223,15 +224,17 @@ class TestToXArray: and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), reason="xarray >= 0.10.0 required", ) - @pytest.mark.parametrize("index", tm.all_index_generator(6)) - def test_to_xarray_index_types(self, index): + def test_to_xarray_index_types(self, indices): + if isinstance(indices, ABCMultiIndex): + pytest.skip("MultiIndex is tested separately") + from xarray import DataArray - s = Series(range(6), index=index) + s = Series(range(len(indices)), index=indices, dtype="object") s.index.name = "foo" result = s.to_xarray() repr(result) - assert len(result) == 6 + assert len(result) == len(indices) assert len(result.coords) == 1 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) assert isinstance(result, DataArray) @@ -240,17 +243,9 @@ def test_to_xarray_index_types(self, index): tm.assert_series_equal(result.to_series(), s, check_index_type=False) @td.skip_if_no("xarray", min_version="0.7.0") - def test_to_xarray(self): + def test_to_xarray_multiindex(self): from xarray import DataArray - s = Series([], dtype=object) - s.index.name = "foo" - result = s.to_xarray() - assert len(result) == 0 - assert len(result.coords) == 1 - tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) - assert isinstance(result, DataArray) - s = Series(range(6)) s.index.name = "foo" s.index = pd.MultiIndex.from_product( @@ -261,3 +256,15 @@ def test_to_xarray(self): tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) assert isinstance(result, DataArray) tm.assert_series_equal(result.to_series(), s) + + @td.skip_if_no("xarray", min_version="0.7.0") + def test_to_xarray(self): + from xarray import DataArray + + s = Series([], dtype=object) + s.index.name = "foo" + result = s.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 8af0fe548e48a..67ba35d3823f4 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -52,9 +52,6 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize( - "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ - ) @pytest.mark.parametrize( "obj", [ @@ -71,9 +68,9 @@ def test_setitem_ndarray_1d(self): (lambda x: x.iloc, "iloc"), ], ) - def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): + def test_getitem_ndarray_3d(self, indices, obj, idxr, idxr_id): # GH 25567 - obj = obj(index) + obj = obj(indices) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) @@ -83,16 +80,16 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): "Cannot index with multidimensional key", r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]", "Index data must be 1-dimensional", + "positional indexers are out-of-bounds", + "Indexing a MultiIndex with a multidimensional key is not implemented", ] ) - with pytest.raises(ValueError, match=msg): + potential_errors = (IndexError, ValueError, NotImplementedError) + with pytest.raises(potential_errors, match=msg): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize( - "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ - ) @pytest.mark.parametrize( "obj", [ @@ -109,9 +106,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): (lambda x: x.iloc, "iloc"), ], ) - def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): + def test_setitem_ndarray_3d(self, indices, obj, idxr, idxr_id): # GH 25567 - obj = obj(index) + obj = obj(indices) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) @@ -119,7 +116,7 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" elif ( - isinstance(index, pd.IntervalIndex) + isinstance(indices, pd.IntervalIndex) and idxr_id == "setitem" and obj.ndim == 1 ): @@ -131,6 +128,14 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): err = ValueError msg = r"Buffer has wrong number of dimensions \(expected 1, got 3\)|" + if ( + (len(indices) == 0) + and (idxr_id == "iloc") + and isinstance(obj, pd.DataFrame) + ): + # TODO: Seems to be bugged + pytest.xfail("This doesn't raise") + with pytest.raises(err, match=msg): idxr[nd3] = 0 diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index a4c55a80a9f0f..dec630c5c4a01 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas.core.dtypes.generic import ABCMultiIndex + import pandas as pd from pandas import DataFrame, Index, Series, isna import pandas._testing as tm @@ -514,9 +516,11 @@ def test_map(self, datetime_series): exp = Series([np.nan, "B", "C", "D"]) tm.assert_series_equal(a.map(c), exp) - @pytest.mark.parametrize("index", tm.all_index_generator(10)) - def test_map_empty(self, index): - s = Series(index) + def test_map_empty(self, indices): + if isinstance(indices, ABCMultiIndex): + pytest.skip("Initializing a Series from a MultiIndex is not supported") + + s = Series(indices) result = s.map({}) expected = pd.Series(np.nan, index=s.index) From 737d59058ab84653314b33bbbd24884870989776 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 21 Mar 2020 21:30:37 +0100 Subject: [PATCH 3/4] created separate test for xfailing test --- pandas/tests/indexing/test_indexing.py | 27 ++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 67ba35d3823f4..944832344074c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -107,6 +107,14 @@ def test_getitem_ndarray_3d(self, indices, obj, idxr, idxr_id): ], ) def test_setitem_ndarray_3d(self, indices, obj, idxr, idxr_id): + if ( + (len(indices) == 0) + and (idxr_id == "iloc") + and isinstance(obj, pd.DataFrame) + ): + # gh-32896 + pytest.skip("This is currently failing. There's an xfailed test below.") + # GH 25567 obj = obj(indices) idxr = idxr(obj) @@ -128,17 +136,20 @@ def test_setitem_ndarray_3d(self, indices, obj, idxr, idxr_id): err = ValueError msg = r"Buffer has wrong number of dimensions \(expected 1, got 3\)|" - if ( - (len(indices) == 0) - and (idxr_id == "iloc") - and isinstance(obj, pd.DataFrame) - ): - # TODO: Seems to be bugged - pytest.xfail("This doesn't raise") - with pytest.raises(err, match=msg): idxr[nd3] = 0 + @pytest.mark.xfail(reason="gh-32896") + def test_setitem_ndarray_3d_does_not_fail_for_iloc_empty_dataframe(self): + # when fixing this, please remove the pytest.skip in test_setitem_ndarray_3d + i = Index([]) + obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = f"Cannot set values with ndim > {obj.ndim}" + with pytest.raises(ValueError, match=msg): + obj.iloc[nd3] = 0 + def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key From 5e09cc3a0e0118f6ce3618bad6f3285c75e3cfce Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 21 Mar 2020 22:22:18 +0100 Subject: [PATCH 4/4] fix test --- pandas/tests/indexing/test_indexing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 944832344074c..a8a21b0610c14 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -107,6 +107,11 @@ def test_getitem_ndarray_3d(self, indices, obj, idxr, idxr_id): ], ) def test_setitem_ndarray_3d(self, indices, obj, idxr, idxr_id): + # GH 25567 + obj = obj(indices) + idxr = idxr(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + if ( (len(indices) == 0) and (idxr_id == "iloc") @@ -115,11 +120,6 @@ def test_setitem_ndarray_3d(self, indices, obj, idxr, idxr_id): # gh-32896 pytest.skip("This is currently failing. There's an xfailed test below.") - # GH 25567 - obj = obj(indices) - idxr = idxr(obj) - nd3 = np.random.randint(5, size=(2, 2, 2)) - if idxr_id == "iloc": err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}"