diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index c024390297fec..de2509ed91be2 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -1,4 +1,6 @@ -from pandas import DataFrame +import numpy as np + +from pandas import DataFrame, date_range import pandas._testing as tm @@ -21,3 +23,56 @@ def test_equals_different_blocks(self): tm.assert_frame_equal(df0, df1) assert df0.equals(df1) assert df1.equals(df0) + + def test_equals(self): + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + df1["text"] = "the sky is so blue. we could use more chocolate.".split() + df1["start"] = date_range("2000-1-1", periods=10, freq="T") + df1["end"] = date_range("2000-1-1", periods=10, freq="D") + df1["diff"] = df1["end"] - df1["start"] + df1["bool"] = np.arange(10) % 3 == 0 + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1["text"].equals(df2["text"]) + assert df1["start"].equals(df2["start"]) + assert df1["end"].equals(df2["end"]) + assert df1["diff"].equals(df2["diff"]) + assert df1["bool"].equals(df2["bool"]) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different["floats"] = different["floats"].astype("float32") + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = date_range("2000-1-1", periods=10, freq="T") + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(["text"], append=True) + df2 = df1.set_index(["text"], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(["floats"], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(["floats"], append=True) + df2 = df1.set_index(["floats"], append=True) + assert df3.equals(df2) diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index 93763bc12ce0d..fa28f7d3e16a2 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -4,6 +4,30 @@ import pandas._testing as tm +def test_head_tail_generic(index, frame_or_series): + # GH#5370 + + ndim = 2 if frame_or_series is DataFrame else 1 + shape = (len(index),) * ndim + vals = np.random.randn(*shape) + obj = frame_or_series(vals, index=index) + + tm.assert_equal(obj.head(), obj.iloc[:5]) + tm.assert_equal(obj.tail(), obj.iloc[-5:]) + + # 0-len + tm.assert_equal(obj.head(0), obj.iloc[0:0]) + tm.assert_equal(obj.tail(0), obj.iloc[0:0]) + + # bounded + tm.assert_equal(obj.head(len(obj) + 1), obj) + tm.assert_equal(obj.tail(len(obj) + 1), obj) + + # neg index + tm.assert_equal(obj.head(-3), obj.head(len(index) - 3)) + tm.assert_equal(obj.tail(-3), obj.tail(len(index) - 3)) + + def test_head_tail(float_frame): tm.assert_frame_equal(float_frame.head(), float_frame[:5]) tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) diff --git a/pandas/tests/generic/methods/test_first_valid_index.py b/pandas/tests/generic/methods/test_first_valid_index.py index bca3452c3c458..8d021f0e3954e 100644 --- a/pandas/tests/generic/methods/test_first_valid_index.py +++ b/pandas/tests/generic/methods/test_first_valid_index.py @@ -9,10 +9,9 @@ class TestFirstValidIndex: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_first_valid_index_single_nan(self, klass): + def test_first_valid_index_single_nan(self, frame_or_series): # GH#9752 Series/DataFrame should both return None, not raise - obj = klass([np.nan]) + obj = frame_or_series([np.nan]) assert obj.first_valid_index() is None assert obj.iloc[:0].first_valid_index() is None diff --git a/pandas/tests/generic/methods/test_pipe.py b/pandas/tests/generic/methods/test_pipe.py index 59e5edc4b8bb5..b378600634bf0 100644 --- a/pandas/tests/generic/methods/test_pipe.py +++ b/pandas/tests/generic/methods/test_pipe.py @@ -5,11 +5,10 @@ class TestPipe: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe(self, klass): + def test_pipe(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) expected = DataFrame({"A": [1, 4, 9]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] expected = expected["A"] @@ -17,20 +16,18 @@ def test_pipe(self, klass): result = obj.pipe(f, 2) tm.assert_equal(result, expected) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe_tuple(self, klass): + def test_pipe_tuple(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] f = lambda x, y: y result = obj.pipe((f, "y"), 0) tm.assert_equal(result, obj) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe_tuple_error(self, klass): + def test_pipe_tuple_error(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] f = lambda x, y: y diff --git a/pandas/tests/generic/methods/test_reorder_levels.py b/pandas/tests/generic/methods/test_reorder_levels.py index 8bb6417e56659..6bfbf089a6108 100644 --- a/pandas/tests/generic/methods/test_reorder_levels.py +++ b/pandas/tests/generic/methods/test_reorder_levels.py @@ -1,20 +1,19 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, MultiIndex import pandas._testing as tm class TestReorderLevels: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_reorder_levels(self, klass): + def test_reorder_levels(self, frame_or_series): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], names=["L0", "L1", "L2"], ) df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) - obj = df if klass is DataFrame else df["A"] + obj = df if frame_or_series is DataFrame else df["A"] # no change, position result = obj.reorder_levels([0, 1, 2]) @@ -32,7 +31,7 @@ def test_reorder_levels(self, klass): names=["L1", "L2", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - expected = expected if klass is DataFrame else expected["A"] + expected = expected if frame_or_series is DataFrame else expected["A"] tm.assert_equal(result, expected) result = obj.reorder_levels([0, 0, 0]) @@ -42,7 +41,7 @@ def test_reorder_levels(self, klass): names=["L0", "L0", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - expected = expected if klass is DataFrame else expected["A"] + expected = expected if frame_or_series is DataFrame else expected["A"] tm.assert_equal(result, expected) result = obj.reorder_levels(["L0", "L0", "L0"]) diff --git a/pandas/tests/generic/methods/test_sample.py b/pandas/tests/generic/methods/test_sample.py index 7303dad9170ed..b26a3785f918d 100644 --- a/pandas/tests/generic/methods/test_sample.py +++ b/pandas/tests/generic/methods/test_sample.py @@ -155,22 +155,20 @@ def test_sample_none_weights(self, obj): ), ], ) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_sample_random_state(self, func_str, arg, klass): + def test_sample_random_state(self, func_str, arg, frame_or_series): # GH#32503 obj = DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) - if klass is Series: + if frame_or_series is Series: obj = obj["col1"] result = obj.sample(n=3, random_state=eval(func_str)(arg)) expected = obj.sample(n=3, random_state=com.random_state(eval(func_str)(arg))) tm.assert_equal(result, expected) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_sample_upsampling_without_replacement(self, klass): + def test_sample_upsampling_without_replacement(self, frame_or_series): # GH#27451 obj = DataFrame({"A": list("abc")}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] msg = ( diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 45601abc95fe6..930c48cbdc214 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -5,8 +5,7 @@ from pandas.core.dtypes.common import is_scalar -import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, Series import pandas._testing as tm # ---------------------------------------------------------------------- @@ -248,31 +247,6 @@ def test_metadata_propagation(self): self.check_metadata(v1 & v2) self.check_metadata(v1 | v2) - def test_head_tail(self, index): - # GH5370 - - o = self._construct(shape=len(index)) - - axis = o._get_axis_name(0) - setattr(o, axis, index) - - o.head() - - self._compare(o.head(), o.iloc[:5]) - self._compare(o.tail(), o.iloc[-5:]) - - # 0-len - self._compare(o.head(0), o.iloc[0:0]) - self._compare(o.tail(0), o.iloc[0:0]) - - # bounded - self._compare(o.head(len(o) + 1), o) - self._compare(o.tail(len(o) + 1), o) - - # neg index - self._compare(o.head(-3), o.head(len(index) - 3)) - self._compare(o.tail(-3), o.tail(len(index) - 3)) - def test_size_compat(self): # GH8846 # size property should be defined @@ -460,77 +434,23 @@ def test_take_invalid_kwargs(self): obj.take(indices, mode="clip") @pytest.mark.parametrize("is_copy", [True, False]) - def test_depr_take_kwarg_is_copy(self, is_copy): + def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series): # GH 27357 - df = DataFrame({"A": [1, 2, 3]}) + obj = DataFrame({"A": [1, 2, 3]}) + if frame_or_series is Series: + obj = obj["A"] + msg = ( "is_copy is deprecated and will be removed in a future version. " "'take' always returns a copy, so there is no need to specify this." ) with tm.assert_produces_warning(FutureWarning) as w: - df.take([0, 1], is_copy=is_copy) + obj.take([0, 1], is_copy=is_copy) assert w[0].message.args[0] == msg - s = Series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - s.take([0, 1], is_copy=is_copy) - - def test_equals(self): - # Add object dtype column with nans - index = np.random.random(10) - df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) - df1["text"] = "the sky is so blue. we could use more chocolate.".split() - df1["start"] = date_range("2000-1-1", periods=10, freq="T") - df1["end"] = date_range("2000-1-1", periods=10, freq="D") - df1["diff"] = df1["end"] - df1["start"] - df1["bool"] = np.arange(10) % 3 == 0 - df1.loc[::2] = np.nan - df2 = df1.copy() - assert df1["text"].equals(df2["text"]) - assert df1["start"].equals(df2["start"]) - assert df1["end"].equals(df2["end"]) - assert df1["diff"].equals(df2["diff"]) - assert df1["bool"].equals(df2["bool"]) - assert df1.equals(df2) - assert not df1.equals(object) - - # different dtype - different = df1.copy() - different["floats"] = different["floats"].astype("float32") - assert not df1.equals(different) - - # different index - different_index = -index - different = df2.set_index(different_index) - assert not df1.equals(different) - - # different columns - different = df2.copy() - different.columns = df2.columns[::-1] - assert not df1.equals(different) - - # DatetimeIndex - index = pd.date_range("2000-1-1", periods=10, freq="T") - df1 = df1.set_index(index) - df2 = df1.copy() - assert df1.equals(df2) - - # MultiIndex - df3 = df1.set_index(["text"], append=True) - df2 = df1.set_index(["text"], append=True) - assert df3.equals(df2) - - df2 = df1.set_index(["floats"], append=True) - assert not df3.equals(df2) - - # NaN in index - df3 = df1.set_index(["floats"], append=True) - df2 = df1.set_index(["floats"], append=True) - assert df3.equals(df2) - - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_classmethods(self, box): + def test_axis_classmethods(self, frame_or_series): + box = frame_or_series obj = box(dtype=object) values = box._AXIS_TO_AXIS_NUMBER.keys() for v in values: @@ -538,24 +458,23 @@ def test_axis_classmethods(self, box): assert obj._get_axis_name(v) == box._get_axis_name(v) assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_names_deprecated(self, box): + def test_axis_names_deprecated(self, frame_or_series): # GH33637 + box = frame_or_series obj = box(dtype=object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): obj._AXIS_NAMES - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_numbers_deprecated(self, box): + def test_axis_numbers_deprecated(self, frame_or_series): # GH33637 + box = frame_or_series obj = box(dtype=object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): obj._AXIS_NUMBERS - @pytest.mark.parametrize("as_frame", [True, False]) - def test_flags_identity(self, as_frame): + def test_flags_identity(self, frame_or_series): s = Series([1, 2]) - if as_frame: + if frame_or_series is DataFrame: s = s.to_frame() assert s.flags is s.flags