diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index eb654be3f12e6..5ce811712b989 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -6,12 +6,11 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestDataFrameReshape(TestData): +class TestDataFrameReshape: def test_pivot(self): data = { "index": ["A", "B", "C", "C", "B", "A"], @@ -101,8 +100,8 @@ def test_pivot_index_none(self): expected.columns.name = "columns" tm.assert_frame_equal(result, expected) - def test_stack_unstack(self): - df = self.frame.copy() + def test_stack_unstack(self, float_frame): + df = float_frame.copy() df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) stacked = df.stack() @@ -515,13 +514,13 @@ def test_unstack_level_binding(self): assert_frame_equal(result, expected) - def test_unstack_to_series(self): + def test_unstack_to_series(self, float_frame): # check reversibility - data = self.frame.unstack() + data = float_frame.unstack() assert isinstance(data, Series) undo = data.unstack().T - assert_frame_equal(undo, self.frame) + assert_frame_equal(undo, float_frame) # check NA handling data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 24833f8c02df0..2b4b20d318adf 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -15,12 +15,11 @@ date_range, ) from pandas.api.types import CategoricalDtype -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestDataFrameSorting(TestData): +class TestDataFrameSorting: def test_sort_values(self): frame = DataFrame( [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC") @@ -295,8 +294,8 @@ def test_sort_datetimes(self): df2 = df.sort_values(by=["C", "B"]) assert_frame_equal(df1, df2) - def test_frame_column_inplace_sort_exception(self): - s = self.frame["A"] + def test_frame_column_inplace_sort_exception(self, float_frame): + s = float_frame["A"] with pytest.raises(ValueError, match="This Series is a view"): s.sort_values(inplace=True) @@ -379,7 +378,7 @@ def test_sort_nat(self): tm.assert_frame_equal(sorted_df, expected) -class TestDataFrameSortIndexKinds(TestData): +class TestDataFrameSortIndexKinds: def test_sort_index_multicolumn(self): A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 649a78b785d21..e1e546256f7cd 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -3,11 +3,10 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series -from pandas.tests.frame.common import TestData import pandas.util.testing as tm -class TestDataFrameSubclassing(TestData): +class TestDataFrameSubclassing: def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it # In reference to PR 9632 diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index b8708e6ca1871..3355d6e746db2 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -17,7 +17,6 @@ period_range, to_datetime, ) -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, @@ -33,12 +32,12 @@ def close_open_fixture(request): return request.param -class TestDataFrameTimeSeriesMethods(TestData): - def test_diff(self): - the_diff = self.tsframe.diff(1) +class TestDataFrameTimeSeriesMethods: + def test_diff(self, datetime_frame): + the_diff = datetime_frame.diff(1) assert_series_equal( - the_diff["A"], self.tsframe["A"] - self.tsframe["A"].shift(1) + the_diff["A"], datetime_frame["A"] - datetime_frame["A"].shift(1) ) # int dtype @@ -50,7 +49,7 @@ def test_diff(self): assert rs.s[1] == 1 # mixed numeric - tf = self.tsframe.astype("float32") + tf = datetime_frame.astype("float32") the_diff = tf.diff(1) assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) @@ -126,14 +125,14 @@ def test_diff_mixed_dtype(self): result = df.diff() assert result[0].dtype == np.float64 - def test_diff_neg_n(self): - rs = self.tsframe.diff(-1) - xp = self.tsframe - self.tsframe.shift(-1) + def test_diff_neg_n(self, datetime_frame): + rs = datetime_frame.diff(-1) + xp = datetime_frame - datetime_frame.shift(-1) assert_frame_equal(rs, xp) - def test_diff_float_n(self): - rs = self.tsframe.diff(1.0) - xp = self.tsframe.diff(1) + def test_diff_float_n(self, datetime_frame): + rs = datetime_frame.diff(1.0) + xp = datetime_frame.diff(1) assert_frame_equal(rs, xp) def test_diff_axis(self): @@ -142,20 +141,20 @@ def test_diff_axis(self): assert_frame_equal(df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])) assert_frame_equal(df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])) - def test_pct_change(self): - rs = self.tsframe.pct_change(fill_method=None) - assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1) + def test_pct_change(self, datetime_frame): + rs = datetime_frame.pct_change(fill_method=None) + assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) - rs = self.tsframe.pct_change(2) - filled = self.tsframe.fillna(method="pad") + rs = datetime_frame.pct_change(2) + filled = datetime_frame.fillna(method="pad") assert_frame_equal(rs, filled / filled.shift(2) - 1) - rs = self.tsframe.pct_change(fill_method="bfill", limit=1) - filled = self.tsframe.fillna(method="bfill", limit=1) + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + filled = datetime_frame.fillna(method="bfill", limit=1) assert_frame_equal(rs, filled / filled.shift(1) - 1) - rs = self.tsframe.pct_change(freq="5D") - filled = self.tsframe.fillna(method="pad") + rs = datetime_frame.pct_change(freq="5D") + filled = datetime_frame.fillna(method="pad") assert_frame_equal( rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) @@ -181,17 +180,19 @@ def test_pct_change_shift_over_nas(self): ("14B", 14, None, None), ], ) - def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): + def test_pct_change_periods_freq( + self, datetime_frame, freq, periods, fill_method, limit + ): # GH 7292 - rs_freq = self.tsframe.pct_change( + rs_freq = datetime_frame.pct_change( freq=freq, fill_method=fill_method, limit=limit ) - rs_periods = self.tsframe.pct_change( + rs_periods = datetime_frame.pct_change( periods, fill_method=fill_method, limit=limit ) assert_frame_equal(rs_freq, rs_periods) - empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) + empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods) @@ -249,39 +250,39 @@ def test_frame_append_datetime64_col_other_units(self): assert (tmp["dates"].values == ex_vals).all() - def test_shift(self): + def test_shift(self, datetime_frame, int_frame): # naive shift - shiftedFrame = self.tsframe.shift(5) - tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) + shiftedFrame = datetime_frame.shift(5) + tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) - shiftedSeries = self.tsframe["A"].shift(5) + shiftedSeries = datetime_frame["A"].shift(5) assert_series_equal(shiftedFrame["A"], shiftedSeries) - shiftedFrame = self.tsframe.shift(-5) - tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) + shiftedFrame = datetime_frame.shift(-5) + tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) - shiftedSeries = self.tsframe["A"].shift(-5) + shiftedSeries = datetime_frame["A"].shift(-5) assert_series_equal(shiftedFrame["A"], shiftedSeries) # shift by 0 - unshifted = self.tsframe.shift(0) - assert_frame_equal(unshifted, self.tsframe) + unshifted = datetime_frame.shift(0) + assert_frame_equal(unshifted, datetime_frame) # shift by DateOffset - shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) - assert len(shiftedFrame) == len(self.tsframe) + shiftedFrame = datetime_frame.shift(5, freq=offsets.BDay()) + assert len(shiftedFrame) == len(datetime_frame) - shiftedFrame2 = self.tsframe.shift(5, freq="B") + shiftedFrame2 = datetime_frame.shift(5, freq="B") assert_frame_equal(shiftedFrame, shiftedFrame2) - d = self.tsframe.index[0] + d = datetime_frame.index[0] shifted_d = d + offsets.BDay(5) assert_series_equal( - self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False + datetime_frame.xs(d), shiftedFrame.xs(shifted_d), check_names=False ) # shift int frame - int_shifted = self.intframe.shift(1) # noqa + int_shifted = int_frame.shift(1) # noqa # Shifting with PeriodIndex ps = tm.makePeriodFrame() @@ -387,7 +388,7 @@ def test_shift_duplicate_columns(self): assert_frame_equal(shifted[0], shifted[1]) assert_frame_equal(shifted[0], shifted[2]) - def test_tshift(self): + def test_tshift(self, datetime_frame): # PeriodIndex ps = tm.makePeriodFrame() shifted = ps.tshift(1) @@ -405,36 +406,36 @@ def test_tshift(self): ps.tshift(freq="M") # DatetimeIndex - shifted = self.tsframe.tshift(1) + shifted = datetime_frame.tshift(1) unshifted = shifted.tshift(-1) - assert_frame_equal(self.tsframe, unshifted) + assert_frame_equal(datetime_frame, unshifted) - shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq) + shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq) assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame( - self.tsframe.values, - Index(np.asarray(self.tsframe.index)), - columns=self.tsframe.columns, + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, ) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) - assert_frame_equal(shifted, self.tsframe.tshift(1)) + assert_frame_equal(shifted, datetime_frame.tshift(1)) assert_frame_equal(unshifted, inferred_ts) - no_freq = self.tsframe.iloc[[0, 5, 7], :] + no_freq = datetime_frame.iloc[[0, 5, 7], :] msg = "Freq was not given and was not set in the index" with pytest.raises(ValueError, match=msg): no_freq.tshift() - def test_truncate(self): - ts = self.tsframe[::3] + def test_truncate(self, datetime_frame): + ts = datetime_frame[::3] - start, end = self.tsframe.index[3], self.tsframe.index[6] + start, end = datetime_frame.index[3], datetime_frame.index[6] - start_missing = self.tsframe.index[2] - end_missing = self.tsframe.index[7] + start_missing = datetime_frame.index[2] + end_missing = datetime_frame.index[7] # neither specified truncated = ts.truncate() @@ -473,11 +474,11 @@ def test_truncate(self): before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq ) - def test_truncate_copy(self): - index = self.tsframe.index - truncated = self.tsframe.truncate(index[5], index[10]) + def test_truncate_copy(self, datetime_frame): + index = datetime_frame.index + truncated = datetime_frame.truncate(index[5], index[10]) truncated.values[:] = 5.0 - assert not (self.tsframe.values[5:11] == 5).any() + assert not (datetime_frame.values[5:11] == 5).any() def test_truncate_nonsortedindex(self): # GH 17935 @@ -510,9 +511,9 @@ def test_truncate_nonsortedindex(self): with pytest.raises(ValueError, match=msg): df.truncate(before=2, after=20, axis=1) - def test_asfreq(self): - offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd()) - rule_monthly = self.tsframe.asfreq("BM") + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) @@ -523,7 +524,7 @@ def test_asfreq(self): filled_dep = rule_monthly.asfreq("B", method="pad") # noqa # test does not blow up on length-0 DataFrame - zero_length = self.tsframe.reindex([]) + zero_length = datetime_frame.reindex([]) result = zero_length.asfreq("BM") assert result is not zero_length @@ -569,13 +570,15 @@ def test_asfreq_fillvalue(self): ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), ], ) - def test_first_last_valid(self, data, idx, expected_first, expected_last): - N = len(self.frame.index) + def test_first_last_valid( + self, float_frame, data, idx, expected_first, expected_last + ): + N = len(float_frame.index) mat = np.random.randn(N) mat[:5] = np.nan mat[-5:] = np.nan - frame = DataFrame({"foo": mat}, index=self.frame.index) + frame = DataFrame({"foo": mat}, index=float_frame.index) index = frame.first_valid_index() assert index == frame.index[5] diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 8fb028a0f0326..67c748227a43d 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -19,7 +19,6 @@ to_datetime, ) import pandas.core.common as com -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, @@ -44,37 +43,37 @@ ] -class TestDataFrameToCSV(TestData): +class TestDataFrameToCSV: def read_csv(self, path, **kwargs): params = dict(index_col=0, parse_dates=True) params.update(**kwargs) return pd.read_csv(path, **params) - def test_to_csv_from_csv1(self): + def test_to_csv_from_csv1(self, float_frame, datetime_frame): with ensure_clean("__tmp_to_csv_from_csv1__") as path: - self.frame["A"][:5] = np.nan + float_frame["A"][:5] = np.nan - self.frame.to_csv(path) - self.frame.to_csv(path, columns=["A", "B"]) - self.frame.to_csv(path, header=False) - self.frame.to_csv(path, index=False) + float_frame.to_csv(path) + float_frame.to_csv(path, columns=["A", "B"]) + float_frame.to_csv(path, header=False) + float_frame.to_csv(path, index=False) # test roundtrip - self.tsframe.to_csv(path) + datetime_frame.to_csv(path) recons = self.read_csv(path) - assert_frame_equal(self.tsframe, recons) + assert_frame_equal(datetime_frame, recons) - self.tsframe.to_csv(path, index_label="index") + datetime_frame.to_csv(path, index_label="index") recons = self.read_csv(path, index_col=None) - assert len(recons.columns) == len(self.tsframe.columns) + 1 + assert len(recons.columns) == len(datetime_frame.columns) + 1 # no index - self.tsframe.to_csv(path, index=False) + datetime_frame.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) - assert_almost_equal(self.tsframe.values, recons.values) + assert_almost_equal(datetime_frame.values, recons.values) # corner case dm = DataFrame( @@ -88,7 +87,7 @@ def test_to_csv_from_csv1(self): recons = self.read_csv(path) assert_frame_equal(dm, recons) - def test_to_csv_from_csv2(self): + def test_to_csv_from_csv2(self, float_frame): with ensure_clean("__tmp_to_csv_from_csv2__") as path: @@ -109,16 +108,16 @@ def test_to_csv_from_csv2(self): # column aliases col_aliases = Index(["AA", "X", "Y", "Z"]) - self.frame2.to_csv(path, header=col_aliases) + float_frame.to_csv(path, header=col_aliases) rs = self.read_csv(path) - xp = self.frame2.copy() + xp = float_frame.copy() xp.columns = col_aliases assert_frame_equal(xp, rs) msg = "Writing 4 cols but got 2 aliases" with pytest.raises(ValueError, match=msg): - self.frame2.to_csv(path, header=["AA", "X"]) + float_frame.to_csv(path, header=["AA", "X"]) def test_to_csv_from_csv3(self): @@ -153,22 +152,22 @@ def test_to_csv_from_csv4(self): assert_frame_equal(df, result, check_index_type=True) - def test_to_csv_from_csv5(self): + def test_to_csv_from_csv5(self, timezone_frame): # tz, 8260 with ensure_clean("__tmp_to_csv_from_csv5__") as path: - self.tzframe.to_csv(path) + timezone_frame.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=["A"]) converter = ( lambda c: to_datetime(result[c]) .dt.tz_convert("UTC") - .dt.tz_convert(self.tzframe[c].dt.tz) + .dt.tz_convert(timezone_frame[c].dt.tz) ) result["B"] = converter("B") result["C"] = converter("C") - assert_frame_equal(result, self.tzframe) + assert_frame_equal(result, timezone_frame) def test_to_csv_cols_reordering(self): # GH3454 @@ -493,37 +492,37 @@ def _to_uni(x): cnlvl=2, ) - def test_to_csv_from_csv_w_some_infs(self): + def test_to_csv_from_csv_w_some_infs(self, float_frame): # test roundtrip with inf, -inf, nan, as full columns and mix - self.frame["G"] = np.nan + float_frame["G"] = np.nan f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] - self.frame["H"] = self.frame.index.map(f) + float_frame["H"] = float_frame.index.map(f) with ensure_clean() as path: - self.frame.to_csv(path) + float_frame.to_csv(path) recons = self.read_csv(path) # TODO to_csv drops column name - assert_frame_equal(self.frame, recons, check_names=False) + assert_frame_equal(float_frame, recons, check_names=False) assert_frame_equal( - np.isinf(self.frame), np.isinf(recons), check_names=False + np.isinf(float_frame), np.isinf(recons), check_names=False ) - def test_to_csv_from_csv_w_all_infs(self): + def test_to_csv_from_csv_w_all_infs(self, float_frame): # test roundtrip with inf, -inf, nan, as full columns and mix - self.frame["E"] = np.inf - self.frame["F"] = -np.inf + float_frame["E"] = np.inf + float_frame["F"] = -np.inf with ensure_clean() as path: - self.frame.to_csv(path) + float_frame.to_csv(path) recons = self.read_csv(path) # TODO to_csv drops column name - assert_frame_equal(self.frame, recons, check_names=False) + assert_frame_equal(float_frame, recons, check_names=False) assert_frame_equal( - np.isinf(self.frame), np.isinf(recons), check_names=False + np.isinf(float_frame), np.isinf(recons), check_names=False ) def test_to_csv_no_index(self): @@ -563,9 +562,9 @@ def test_to_csv_headers(self): recons.reset_index(inplace=True) assert_frame_equal(to_df, recons) - def test_to_csv_multiindex(self): + def test_to_csv_multiindex(self, float_frame, datetime_frame): - frame = self.frame + frame = float_frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) @@ -586,10 +585,10 @@ def test_to_csv_multiindex(self): assert frame.index.names == df.index.names # needed if setUp becomes a class method - self.frame.index = old_index + float_frame.index = old_index # try multiindex with dates - tsframe = self.tsframe + tsframe = datetime_frame old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) @@ -608,10 +607,10 @@ def test_to_csv_multiindex(self): # no index tsframe.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) - assert_almost_equal(recons.values, self.tsframe.values) + assert_almost_equal(recons.values, datetime_frame.values) # needed if setUp becomes class method - self.tsframe.index = old_index + datetime_frame.index = old_index with ensure_clean("__tmp_to_csv_multiindex__") as path: # GH3571, GH1651, GH3141 @@ -889,13 +888,13 @@ def test_to_csv_unicode_index_col(self): df2 = read_csv(buf, index_col=0, encoding="UTF-8") assert_frame_equal(df, df2) - def test_to_csv_stringio(self): + def test_to_csv_stringio(self, float_frame): buf = StringIO() - self.frame.to_csv(buf) + float_frame.to_csv(buf) buf.seek(0) recons = read_csv(buf, index_col=0) # TODO to_csv drops column name - assert_frame_equal(recons, self.frame, check_names=False) + assert_frame_equal(recons, float_frame, check_names=False) def test_to_csv_float_format(self): @@ -1013,14 +1012,14 @@ def test_to_csv_from_csv_categorical(self): assert res.getvalue() == exp.getvalue() - def test_to_csv_path_is_none(self): + def test_to_csv_path_is_none(self, float_frame): # GH 8215 # Make sure we return string for consistency with # Series.to_csv() - csv_str = self.frame.to_csv(path_or_buf=None) + csv_str = float_frame.to_csv(path_or_buf=None) assert isinstance(csv_str, str) recons = pd.read_csv(StringIO(csv_str), index_col=0) - assert_frame_equal(self.frame, recons) + assert_frame_equal(float_frame, recons) @pytest.mark.parametrize( "df,encoding", @@ -1077,9 +1076,9 @@ def test_to_csv_compression(self, df, encoding, compression): with tm.decompress_file(filename, compression) as fh: assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) - def test_to_csv_date_format(self): + def test_to_csv_date_format(self, datetime_frame): with ensure_clean("__tmp_to_csv_date_format__") as path: - dt_index = self.tsframe.index + dt_index = datetime_frame.index datetime_frame = DataFrame( {"A": dt_index, "B": dt_index.shift(1)}, index=dt_index )