diff --git a/pandas/tests/frame/test_sort_values_level_as_str.py b/pandas/tests/frame/test_sort_values_level_as_str.py index 2bcc115bcd09c..b0287d9180859 100644 --- a/pandas/tests/frame/test_sort_values_level_as_str.py +++ b/pandas/tests/frame/test_sort_values_level_as_str.py @@ -4,8 +4,7 @@ from pandas.errors import PerformanceWarning from pandas import DataFrame -from pandas.util import testing as tm -from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm @pytest.fixture @@ -62,7 +61,7 @@ def test_sort_index_level_and_column_label(df_none, df_idx, sort_names, ascendin # Compute result sorting on mix on columns and index levels result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_column_level_and_index_label(df_none, df_idx, sort_names, ascending): @@ -88,6 +87,6 @@ def test_sort_column_level_and_index_label(df_none, df_idx, sort_names, ascendin # Accessing multi-level columns that are not lexsorted raises a # performance warning with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) else: - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 2b4b20d318adf..9ea78b974fcbb 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -16,7 +16,6 @@ ) from pandas.api.types import CategoricalDtype import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal class TestDataFrameSorting: @@ -29,30 +28,30 @@ def test_sort_values(self): sorted_df = frame.sort_values(by="A") indexer = frame["A"].argsort().values expected = frame.loc[frame.index[indexer]] - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by="A", ascending=False) indexer = indexer[::-1] expected = frame.loc[frame.index[indexer]] - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by="A", ascending=False) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # GH4839 sorted_df = frame.sort_values(by=["A"], ascending=[False]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # multiple bys sorted_df = frame.sort_values(by=["B", "C"]) expected = frame.loc[[2, 1, 3]] - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=["B", "C"], ascending=False) - assert_frame_equal(sorted_df, expected[::-1]) + tm.assert_frame_equal(sorted_df, expected[::-1]) sorted_df = frame.sort_values(by=["B", "A"], ascending=[True, False]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) msg = "No axis named 2 for object type " with pytest.raises(ValueError, match=msg): @@ -61,22 +60,22 @@ def test_sort_values(self): # by row (axis=1): GH 10806 sorted_df = frame.sort_values(by=3, axis=1) expected = frame - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=3, axis=1, ascending=False) expected = frame.reindex(columns=["C", "B", "A"]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 2], axis="columns") expected = frame.reindex(columns=["B", "A", "C"]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False) expected = frame.reindex(columns=["C", "B", "A"]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) msg = r"Length of ascending \(5\) != length of by \(2\)" with pytest.raises(ValueError, match=msg): @@ -90,22 +89,22 @@ def test_sort_values_inplace(self): sorted_df = frame.copy() sorted_df.sort_values(by="A", inplace=True) expected = frame.sort_values(by="A") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.copy() sorted_df.sort_values(by=1, axis=1, inplace=True) expected = frame.sort_values(by=1, axis=1) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.copy() sorted_df.sort_values(by="A", ascending=False, inplace=True) expected = frame.sort_values(by="A", ascending=False) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) sorted_df = frame.copy() sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True) expected = frame.sort_values(by=["A", "B"], ascending=False) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) def test_sort_nan(self): # GH3917 @@ -118,18 +117,18 @@ def test_sort_nan(self): index=[2, 0, 3, 1, 6, 4, 5], ) sorted_df = df.sort_values(["A"], na_position="first") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) expected = DataFrame( {"A": [nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, nan, 9, 2]}, index=[2, 5, 4, 6, 1, 0, 3], ) sorted_df = df.sort_values(["A"], na_position="first", ascending=False) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) expected = df.reindex(columns=["B", "A"]) sorted_df = df.sort_values(by=1, axis=1, na_position="first") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # na_position='last', order expected = DataFrame( @@ -137,7 +136,7 @@ def test_sort_nan(self): index=[3, 0, 1, 6, 4, 5, 2], ) sorted_df = df.sort_values(["A", "B"]) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # na_position='first', order expected = DataFrame( @@ -145,7 +144,7 @@ def test_sort_nan(self): index=[2, 3, 0, 1, 6, 4, 5], ) sorted_df = df.sort_values(["A", "B"], na_position="first") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # na_position='first', not order expected = DataFrame( @@ -153,7 +152,7 @@ def test_sort_nan(self): index=[2, 0, 3, 1, 6, 4, 5], ) sorted_df = df.sort_values(["A", "B"], ascending=[1, 0], na_position="first") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # na_position='last', not order expected = DataFrame( @@ -161,7 +160,7 @@ def test_sort_nan(self): index=[5, 4, 6, 1, 3, 0, 2], ) sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="last") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # Test DataFrame with nan label df = DataFrame( @@ -175,7 +174,7 @@ def test_sort_nan(self): {"A": [1, 2, nan, 1, 6, 8, 4], "B": [9, nan, 5, 2, 5, 4, 5]}, index=[1, 2, 3, 4, 5, 6, nan], ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=True, na_position='first' sorted_df = df.sort_index(na_position="first") @@ -183,7 +182,7 @@ def test_sort_nan(self): {"A": [4, 1, 2, nan, 1, 6, 8], "B": [5, 9, nan, 5, 2, 5, 4]}, index=[nan, 1, 2, 3, 4, 5, 6], ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='last' sorted_df = df.sort_index(kind="quicksort", ascending=False) @@ -191,7 +190,7 @@ def test_sort_nan(self): {"A": [8, 6, 1, nan, 2, 1, 4], "B": [4, 5, 2, 5, nan, 9, 5]}, index=[6, 5, 4, 3, 2, 1, nan], ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='first' sorted_df = df.sort_index( @@ -201,7 +200,7 @@ def test_sort_nan(self): {"A": [4, 8, 6, 1, nan, 2, 1], "B": [5, 4, 5, 2, 5, nan, 9]}, index=[nan, 6, 5, 4, 3, 2, 1], ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) def test_stable_descending_sort(self): # GH #6399 @@ -210,7 +209,7 @@ def test_stable_descending_sort(self): columns=["sort_col", "order"], ) sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False) - assert_frame_equal(df, sorted_df) + tm.assert_frame_equal(df, sorted_df) def test_stable_descending_multicolumn_sort(self): nan = np.nan @@ -223,7 +222,7 @@ def test_stable_descending_multicolumn_sort(self): sorted_df = df.sort_values( ["A", "B"], ascending=[0, 1], na_position="first", kind="mergesort" ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) expected = DataFrame( {"A": [nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, nan, 9, 2]}, @@ -232,7 +231,7 @@ def test_stable_descending_multicolumn_sort(self): sorted_df = df.sort_values( ["A", "B"], ascending=[0, 0], na_position="first", kind="mergesort" ) - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) def test_sort_multi_index(self): # GH 25775, testing that sorting by index works with a multi-index. @@ -253,7 +252,7 @@ def test_stable_categorial(self): df = DataFrame({"x": pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True)}) expected = df.copy() sorted_df = df.sort_values("x", kind="mergesort") - assert_frame_equal(sorted_df, expected) + tm.assert_frame_equal(sorted_df, expected) def test_sort_datetimes(self): @@ -283,16 +282,16 @@ def test_sort_datetimes(self): df1 = df.sort_values(by="A") df2 = df.sort_values(by=["A"]) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(df1, df2) df1 = df.sort_values(by="B") df2 = df.sort_values(by=["B"]) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(df1, df2) df1 = df.sort_values(by="B") df2 = df.sort_values(by=["C", "B"]) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(df1, df2) def test_frame_column_inplace_sort_exception(self, float_frame): s = float_frame["A"] @@ -325,14 +324,14 @@ def test_sort_nat_values_in_int_column(self): # NaT is not a "na" for int64 columns, so na_position must not # influence the result: df_sorted = df.sort_values(["int", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + tm.assert_frame_equal(df_sorted, df_reversed) df_sorted = df.sort_values(["int", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + tm.assert_frame_equal(df_sorted, df_reversed) # reverse sorting order df_sorted = df.sort_values(["int", "float"], ascending=False) - assert_frame_equal(df_sorted, df) + tm.assert_frame_equal(df_sorted, df) # and now check if NaT is still considered as "na" for datetime64 # columns: @@ -348,14 +347,14 @@ def test_sort_nat_values_in_int_column(self): ) df_sorted = df.sort_values(["datetime", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + tm.assert_frame_equal(df_sorted, df_reversed) df_sorted = df.sort_values(["datetime", "float"], na_position="last") - assert_frame_equal(df_sorted, df) + tm.assert_frame_equal(df_sorted, df) # Ascending should not affect the results. df_sorted = df.sort_values(["datetime", "float"], ascending=False) - assert_frame_equal(df_sorted, df) + tm.assert_frame_equal(df_sorted, df) def test_sort_nat(self): @@ -392,7 +391,7 @@ def test_sort_index_multicolumn(self): result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) expected = frame.take(indexer) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): @@ -402,7 +401,7 @@ def test_sort_index_multicolumn(self): (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) ) expected = frame.take(indexer) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): @@ -410,7 +409,7 @@ def test_sort_index_multicolumn(self): result = frame.sort_values(by=["B", "A"]) indexer = np.lexsort((frame["A"], frame["B"])) expected = frame.take(indexer) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index_inplace(self): frame = DataFrame( @@ -423,25 +422,25 @@ def test_sort_index_inplace(self): df = unordered.copy() df.sort_index(inplace=True) expected = frame - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) assert a_id != id(df["A"]) df = unordered.copy() df.sort_index(ascending=False, inplace=True) expected = frame[::-1] - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) # axis=1 unordered = frame.loc[:, ["D", "B", "C", "A"]] df = unordered.copy() df.sort_index(axis=1, inplace=True) expected = frame - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) df = unordered.copy() df.sort_index(axis=1, ascending=False, inplace=True) expected = frame.iloc[:, ::-1] - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_sort_index_different_sortorder(self): A = np.arange(20).repeat(5) @@ -460,18 +459,18 @@ def test_sort_index_different_sortorder(self): ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # test with multiindex, too idf = df.set_index(["A", "B"]) result = idf.sort_index(ascending=[1, 0]) expected = idf.take(ex_indexer) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # also, Series! result = idf["C"].sort_index(ascending=[1, 0]) - assert_series_equal(result, expected["C"]) + tm.assert_series_equal(result, expected["C"]) def test_sort_index_duplicates(self): @@ -524,7 +523,7 @@ def test_sort_index_duplicates(self): with tm.assert_produces_warning(FutureWarning): df.sort_index(by=("a", 1)) result = df.sort_values(by=("a", 1)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) @@ -532,25 +531,25 @@ def test_sort_index_level(self): result = df.sort_index(level="A", sort_remaining=False) expected = df - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.sort_index(level=["A", "B"], sort_remaining=False) expected = df - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Error thrown by sort_index when # first index is sorted last (#26053) result = df.sort_index(level=["C", "B", "A"]) expected = df.iloc[[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.sort_index(level=["B", "C", "A"]) expected = df.iloc[[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.sort_index(level=["C", "A"]) expected = df.iloc[[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index_categorical_index(self): @@ -563,11 +562,11 @@ def test_sort_index_categorical_index(self): result = df.sort_index() expected = df.iloc[[4, 0, 1, 5, 2, 3]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.sort_index(ascending=False) expected = df.iloc[[2, 3, 0, 1, 5, 4]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index(self): # GH13496 @@ -582,20 +581,20 @@ def test_sort_index(self): unordered = frame.loc[[3, 2, 4, 1]] result = unordered.sort_index(axis=0) expected = frame - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unordered.sort_index(ascending=False) expected = frame[::-1] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # axis=1 : sort columns by column names unordered = frame.iloc[:, [2, 1, 3, 0]] result = unordered.sort_index(axis=1) - assert_frame_equal(result, frame) + tm.assert_frame_equal(result, frame) result = unordered.sort_index(axis=1, ascending=False) expected = frame.iloc[:, ::-1] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("level", ["A", 0]) # GH 21052 def test_sort_index_multiindex(self, level): @@ -612,7 +611,7 @@ def test_sort_index_multiindex(self, level): ) expected = pd.DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) result = df.sort_index(level=level) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # sort_remaining=False expected_mi = MultiIndex.from_tuples( @@ -620,7 +619,7 @@ def test_sort_index_multiindex(self, level): ) expected = pd.DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) result = df.sort_index(level=level, sort_remaining=False) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack @@ -672,7 +671,7 @@ def test_sort_index_na_position_with_categories(self): index=na_indices + category_indices, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # sort ascending with na last result = df.sort_values( @@ -687,7 +686,7 @@ def test_sort_index_na_position_with_categories(self): index=category_indices + na_indices, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # sort descending with na first result = df.sort_values( @@ -704,7 +703,7 @@ def test_sort_index_na_position_with_categories(self): index=reversed_na_indices + reversed_category_indices, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # sort descending with na last result = df.sort_values( @@ -721,7 +720,7 @@ def test_sort_index_na_position_with_categories(self): index=reversed_category_indices + reversed_na_indices, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_sort_index_na_position_with_categories_raises(self): df = pd.DataFrame( diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 3355d6e746db2..cad1fd60ca2a9 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -18,11 +18,6 @@ to_datetime, ) import pandas.util.testing as tm -from pandas.util.testing import ( - assert_frame_equal, - assert_index_equal, - assert_series_equal, -) import pandas.tseries.offsets as offsets @@ -36,7 +31,7 @@ class TestDataFrameTimeSeriesMethods: def test_diff(self, datetime_frame): the_diff = datetime_frame.diff(1) - assert_series_equal( + tm.assert_series_equal( the_diff["A"], datetime_frame["A"] - datetime_frame["A"].shift(1) ) @@ -51,7 +46,7 @@ def test_diff(self, datetime_frame): # mixed numeric tf = datetime_frame.astype("float32") the_diff = tf.diff(1) - assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) + tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) # issue 10907 df = pd.DataFrame({"y": pd.Series([2]), "z": pd.Series([3])}) @@ -60,7 +55,7 @@ def test_diff(self, datetime_frame): expected = pd.DataFrame( {"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)} ).astype("float64") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): @@ -79,7 +74,7 @@ def test_diff_datetime_axis0(self, tz): 1: pd.TimedeltaIndex(["NaT", "1 days"]), } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis1(self, tz): @@ -98,7 +93,7 @@ def test_diff_datetime_axis1(self, tz): 1: pd.TimedeltaIndex(["0 days", "0 days"]), } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) else: with pytest.raises(NotImplementedError): result = df.diff(axis=1) @@ -116,7 +111,7 @@ def test_diff_timedelta(self): exp = DataFrame( [[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"] ) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) def test_diff_mixed_dtype(self): df = DataFrame(np.random.randn(5, 3)) @@ -128,34 +123,38 @@ def test_diff_mixed_dtype(self): def test_diff_neg_n(self, datetime_frame): rs = datetime_frame.diff(-1) xp = datetime_frame - datetime_frame.shift(-1) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_diff_float_n(self, datetime_frame): rs = datetime_frame.diff(1.0) xp = datetime_frame.diff(1) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_diff_axis(self): # GH 9727 df = DataFrame([[1.0, 2.0], [3.0, 4.0]]) - assert_frame_equal(df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])) - assert_frame_equal(df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])) + tm.assert_frame_equal( + df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]]) + ) + tm.assert_frame_equal( + df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]) + ) def test_pct_change(self, datetime_frame): rs = datetime_frame.pct_change(fill_method=None) - assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) + tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) rs = datetime_frame.pct_change(2) filled = datetime_frame.fillna(method="pad") - assert_frame_equal(rs, filled / filled.shift(2) - 1) + tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.fillna(method="bfill", limit=1) - assert_frame_equal(rs, filled / filled.shift(1) - 1) + tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) rs = datetime_frame.pct_change(freq="5D") filled = datetime_frame.fillna(method="pad") - assert_frame_equal( + tm.assert_frame_equal( rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) @@ -167,7 +166,7 @@ def test_pct_change_shift_over_nas(self): chg = df.pct_change() expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) - assert_frame_equal(chg, edf) + tm.assert_frame_equal(chg, edf) @pytest.mark.parametrize( "freq, periods, fill_method, limit", @@ -190,12 +189,12 @@ def test_pct_change_periods_freq( rs_periods = datetime_frame.pct_change( periods, fill_method=fill_method, limit=limit ) - assert_frame_equal(rs_freq, rs_periods) + tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) - assert_frame_equal(rs_freq, rs_periods) + tm.assert_frame_equal(rs_freq, rs_periods) def test_frame_ctor_datetime64_column(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") @@ -256,28 +255,28 @@ def test_shift(self, datetime_frame, int_frame): tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) shiftedSeries = datetime_frame["A"].shift(5) - assert_series_equal(shiftedFrame["A"], shiftedSeries) + tm.assert_series_equal(shiftedFrame["A"], shiftedSeries) shiftedFrame = datetime_frame.shift(-5) tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) shiftedSeries = datetime_frame["A"].shift(-5) - assert_series_equal(shiftedFrame["A"], shiftedSeries) + tm.assert_series_equal(shiftedFrame["A"], shiftedSeries) # shift by 0 unshifted = datetime_frame.shift(0) - assert_frame_equal(unshifted, datetime_frame) + tm.assert_frame_equal(unshifted, datetime_frame) # shift by DateOffset shiftedFrame = datetime_frame.shift(5, freq=offsets.BDay()) assert len(shiftedFrame) == len(datetime_frame) shiftedFrame2 = datetime_frame.shift(5, freq="B") - assert_frame_equal(shiftedFrame, shiftedFrame2) + tm.assert_frame_equal(shiftedFrame, shiftedFrame2) d = datetime_frame.index[0] shifted_d = d + offsets.BDay(5) - assert_series_equal( + tm.assert_series_equal( datetime_frame.xs(d), shiftedFrame.xs(shifted_d), check_names=False ) @@ -296,8 +295,8 @@ def test_shift(self, datetime_frame, int_frame): shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, offsets.BDay()) - assert_frame_equal(shifted2, shifted3) - assert_frame_equal(ps, shifted2.shift(-1, "B")) + tm.assert_frame_equal(shifted2, shifted3) + tm.assert_frame_equal(ps, shifted2.shift(-1, "B")) msg = "does not match PeriodIndex freq" with pytest.raises(ValueError, match=msg): @@ -312,7 +311,7 @@ def test_shift(self, datetime_frame, int_frame): axis=1, ) result = df.shift(1, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # shift named axis df = DataFrame(np.random.rand(10, 5)) @@ -322,7 +321,7 @@ def test_shift(self, datetime_frame, int_frame): axis=1, ) result = df.shift(1, axis="columns") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_shift_bool(self): df = DataFrame({"high": [True, False], "low": [False, False]}) @@ -331,7 +330,7 @@ def test_shift_bool(self): np.array([[np.nan, np.nan], [True, False]], dtype=object), columns=["high", "low"], ) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_shift_categorical(self): # GH 9416 @@ -340,7 +339,7 @@ def test_shift_categorical(self): df = DataFrame({"one": s1, "two": s2}) rs = df.shift(1) xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)}) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_shift_fill_value(self): # GH #24128 @@ -351,20 +350,20 @@ def test_shift_fill_value(self): [0, 1, 2, 3, 4], index=date_range("1/1/2000", periods=5, freq="H") ) result = df.shift(1, fill_value=0) - assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp) exp = DataFrame( [0, 0, 1, 2, 3], index=date_range("1/1/2000", periods=5, freq="H") ) result = df.shift(2, fill_value=0) - assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp) def test_shift_empty(self): # Regression test for #8019 df = DataFrame({"foo": []}) rs = df.shift(-1) - assert_frame_equal(df, rs) + tm.assert_frame_equal(df, rs) def test_shift_duplicate_columns(self): # GH 9092; verify that position-based shifting works @@ -382,11 +381,11 @@ def test_shift_duplicate_columns(self): # sanity check the base case nulls = shifted[0].isna().sum() - assert_series_equal(nulls, Series(range(1, 6), dtype="int64")) + tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64")) # check all answers are the same - assert_frame_equal(shifted[0], shifted[1]) - assert_frame_equal(shifted[0], shifted[2]) + tm.assert_frame_equal(shifted[0], shifted[1]) + tm.assert_frame_equal(shifted[0], shifted[2]) def test_tshift(self, datetime_frame): # PeriodIndex @@ -394,13 +393,13 @@ def test_tshift(self, datetime_frame): shifted = ps.tshift(1) unshifted = shifted.tshift(-1) - assert_frame_equal(unshifted, ps) + tm.assert_frame_equal(unshifted, ps) shifted2 = ps.tshift(freq="B") - assert_frame_equal(shifted, shifted2) + tm.assert_frame_equal(shifted, shifted2) shifted3 = ps.tshift(freq=offsets.BDay()) - assert_frame_equal(shifted, shifted3) + tm.assert_frame_equal(shifted, shifted3) with pytest.raises(ValueError, match="does not match"): ps.tshift(freq="M") @@ -409,10 +408,10 @@ def test_tshift(self, datetime_frame): shifted = datetime_frame.tshift(1) unshifted = shifted.tshift(-1) - assert_frame_equal(datetime_frame, unshifted) + tm.assert_frame_equal(datetime_frame, unshifted) shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq) - assert_frame_equal(shifted, shifted2) + tm.assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame( datetime_frame.values, @@ -421,8 +420,8 @@ def test_tshift(self, datetime_frame): ) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) - assert_frame_equal(shifted, datetime_frame.tshift(1)) - assert_frame_equal(unshifted, inferred_ts) + tm.assert_frame_equal(shifted, datetime_frame.tshift(1)) + tm.assert_frame_equal(unshifted, inferred_ts) no_freq = datetime_frame.iloc[[0, 5, 7], :] msg = "Freq was not given and was not set in the index" @@ -439,34 +438,34 @@ def test_truncate(self, datetime_frame): # neither specified truncated = ts.truncate() - assert_frame_equal(truncated, ts) + tm.assert_frame_equal(truncated, ts) # both specified expected = ts[1:3] truncated = ts.truncate(start, end) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) truncated = ts.truncate(start_missing, end_missing) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) # start specified expected = ts[1:] truncated = ts.truncate(before=start) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) truncated = ts.truncate(before=start_missing) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) # end specified expected = ts[:3] truncated = ts.truncate(after=end) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) truncated = ts.truncate(after=end_missing) - assert_frame_equal(truncated, expected) + tm.assert_frame_equal(truncated, expected) msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" with pytest.raises(ValueError, match=msg): @@ -553,11 +552,11 @@ def test_asfreq_fillvalue(self): actual_df = df.asfreq(freq="1S", fill_value=9.0) expected_df = df.asfreq(freq="1S").fillna(9.0) expected_df.loc["2016-01-01 00:00:08", "one"] = None - assert_frame_equal(expected_df, actual_df) + tm.assert_frame_equal(expected_df, actual_df) expected_series = ts.asfreq(freq="1S").fillna(9.0) actual_series = ts.asfreq(freq="1S", fill_value=9.0) - assert_series_equal(expected_series, actual_series) + tm.assert_series_equal(expected_series, actual_series) @pytest.mark.parametrize( "data,idx,expected_first,expected_last", @@ -621,14 +620,14 @@ def test_first_subset(self): result = ts.first("3M") expected = ts[:"3/31/2000"] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = ts.first("21D") expected = ts[:21] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = ts[:0].first("3M") - assert_frame_equal(result, ts[:0]) + tm.assert_frame_equal(result, ts[:0]) def test_first_raises(self): # GH20725 @@ -647,14 +646,14 @@ def test_last_subset(self): result = ts.last("21D") expected = ts["2000-01-10":] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = ts.last("21D") expected = ts[-21:] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = ts[:0].last("3M") - assert_frame_equal(result, ts[:0]) + tm.assert_frame_equal(result, ts[:0]) def test_last_raises(self): # GH20725 @@ -672,19 +671,19 @@ def test_at_time(self): result = ts.at_time("9:30") expected = ts.at_time(time(9, 30)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = ts.loc[time(9, 30)] expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range("1/1/2000", "1/31/2000") ts = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts.at_time(time(0, 0)) - assert_frame_equal(result, ts) + tm.assert_frame_equal(result, ts) # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) @@ -736,7 +735,7 @@ def test_at_time_axis(self, axis): expected = ts.loc[:, indices] result = ts.at_time("9:30", axis=axis) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_between_time(self, close_open_fixture): rng = date_range("1/1/2000", "1/5/2000", freq="5min") @@ -767,7 +766,7 @@ def test_between_time(self, close_open_fixture): result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") @@ -890,7 +889,7 @@ def test_frame_to_period(self): pts = df.to_period() exp = df.copy() exp.index = pr - assert_frame_equal(pts, exp) + tm.assert_frame_equal(pts, exp) pts = df.to_period("M") tm.assert_index_equal(pts.index, exp.index.asfreq("M")) @@ -899,7 +898,7 @@ def test_frame_to_period(self): pts = df.to_period(axis=1) exp = df.copy() exp.columns = pr - assert_frame_equal(pts, exp) + tm.assert_frame_equal(pts, exp) pts = df.to_period("M", axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) @@ -926,7 +925,7 @@ def test_tz_convert_and_localize(self, fn): df1 = DataFrame(np.ones(5), index=l0) df1 = getattr(df1, fn)("US/Pacific") - assert_index_equal(df1.index, l0_expected) + tm.assert_index_equal(df1.index, l0_expected) # MultiIndex # GH7846 @@ -934,14 +933,14 @@ def test_tz_convert_and_localize(self, fn): df3 = getattr(df2, fn)("US/Pacific", level=0) assert not df3.index.levels[0].equals(l0) - assert_index_equal(df3.index.levels[0], l0_expected) - assert_index_equal(df3.index.levels[1], l1) + tm.assert_index_equal(df3.index.levels[0], l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1) assert not df3.index.levels[1].equals(l1_expected) df3 = getattr(df2, fn)("US/Pacific", level=1) - assert_index_equal(df3.index.levels[0], l0) + tm.assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) - assert_index_equal(df3.index.levels[1], l1_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) @@ -949,9 +948,9 @@ def test_tz_convert_and_localize(self, fn): # TODO: untested df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa - assert_index_equal(df3.index.levels[0], l0) + tm.assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) - assert_index_equal(df3.index.levels[1], l1_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) # Bad Inputs diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 67c748227a43d..ad058faff96e7 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -20,13 +20,6 @@ ) import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.testing import ( - assert_almost_equal, - assert_frame_equal, - assert_series_equal, - ensure_clean, - makeCustomDataframe as mkdf, -) from pandas.io.common import _get_handle @@ -52,7 +45,7 @@ def read_csv(self, path, **kwargs): def test_to_csv_from_csv1(self, float_frame, datetime_frame): - with ensure_clean("__tmp_to_csv_from_csv1__") as path: + with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: float_frame["A"][:5] = np.nan float_frame.to_csv(path) @@ -63,7 +56,7 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame): # test roundtrip datetime_frame.to_csv(path) recons = self.read_csv(path) - assert_frame_equal(datetime_frame, recons) + tm.assert_frame_equal(datetime_frame, recons) datetime_frame.to_csv(path, index_label="index") recons = self.read_csv(path, index_col=None) @@ -73,7 +66,7 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame): # no index datetime_frame.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) - assert_almost_equal(datetime_frame.values, recons.values) + tm.assert_almost_equal(datetime_frame.values, recons.values) # corner case dm = DataFrame( @@ -85,11 +78,11 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame): dm.to_csv(path) recons = self.read_csv(path) - assert_frame_equal(dm, recons) + tm.assert_frame_equal(dm, recons) def test_to_csv_from_csv2(self, float_frame): - with ensure_clean("__tmp_to_csv_from_csv2__") as path: + with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: # duplicate index df = DataFrame( @@ -97,14 +90,14 @@ def test_to_csv_from_csv2(self, float_frame): ) df.to_csv(path) result = self.read_csv(path) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) df = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) df.to_csv(path) result = self.read_csv(path, index_col=[0, 1, 2], parse_dates=False) - assert_frame_equal(result, df, check_names=False) + tm.assert_frame_equal(result, df, check_names=False) # column aliases col_aliases = Index(["AA", "X", "Y", "Z"]) @@ -113,7 +106,7 @@ def test_to_csv_from_csv2(self, float_frame): rs = self.read_csv(path) xp = float_frame.copy() xp.columns = col_aliases - assert_frame_equal(xp, rs) + tm.assert_frame_equal(xp, rs) msg = "Writing 4 cols but got 2 aliases" with pytest.raises(ValueError, match=msg): @@ -121,7 +114,7 @@ def test_to_csv_from_csv2(self, float_frame): def test_to_csv_from_csv3(self): - with ensure_clean("__tmp_to_csv_from_csv3__") as path: + with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: df1 = DataFrame(np.random.randn(3, 1)) df2 = DataFrame(np.random.randn(3, 1)) @@ -131,11 +124,11 @@ def test_to_csv_from_csv3(self): rs = pd.read_csv(path, index_col=0) rs.columns = [int(label) for label in rs.columns] xp.columns = [int(label) for label in xp.columns] - assert_frame_equal(xp, rs) + tm.assert_frame_equal(xp, rs) def test_to_csv_from_csv4(self): - with ensure_clean("__tmp_to_csv_from_csv4__") as path: + with tm.ensure_clean("__tmp_to_csv_from_csv4__") as path: # GH 10833 (TimedeltaIndex formatting) dt = pd.Timedelta(seconds=1) df = pd.DataFrame( @@ -150,12 +143,12 @@ def test_to_csv_from_csv4(self): result.index = result.index.rename("dt_index") result["dt_data"] = pd.to_timedelta(result["dt_data"]) - assert_frame_equal(df, result, check_index_type=True) + tm.assert_frame_equal(df, result, check_index_type=True) def test_to_csv_from_csv5(self, timezone_frame): # tz, 8260 - with ensure_clean("__tmp_to_csv_from_csv5__") as path: + with tm.ensure_clean("__tmp_to_csv_from_csv5__") as path: timezone_frame.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=["A"]) @@ -167,7 +160,7 @@ def test_to_csv_from_csv5(self, timezone_frame): ) result["B"] = converter("B") result["C"] = converter("C") - assert_frame_equal(result, timezone_frame) + tm.assert_frame_equal(result, timezone_frame) def test_to_csv_cols_reordering(self): # GH3454 @@ -176,21 +169,21 @@ def test_to_csv_cols_reordering(self): chunksize = 5 N = int(chunksize * 2.5) - df = mkdf(N, 3) + df = tm.makeCustomDataframe(N, 3) cs = df.columns cols = [cs[2], cs[0]] - with ensure_clean() as path: + with tm.ensure_clean() as path: df.to_csv(path, columns=cols, chunksize=chunksize) rs_c = pd.read_csv(path, index_col=0) - assert_frame_equal(df[cols], rs_c, check_names=False) + tm.assert_frame_equal(df[cols], rs_c, check_names=False) def test_to_csv_new_dupe_cols(self): import pandas as pd def _check_df(df, cols=None): - with ensure_clean() as path: + with tm.ensure_clean() as path: df.to_csv(path, columns=cols, chunksize=chunksize) rs_c = pd.read_csv(path, index_col=0) @@ -208,20 +201,20 @@ def _check_df(df, cols=None): obj_df = df[c] obj_rs = rs_c[c] if isinstance(obj_df, Series): - assert_series_equal(obj_df, obj_rs) + tm.assert_series_equal(obj_df, obj_rs) else: - assert_frame_equal(obj_df, obj_rs, check_names=False) + tm.assert_frame_equal(obj_df, obj_rs, check_names=False) # wrote in the same order else: rs_c.columns = df.columns - assert_frame_equal(df, rs_c, check_names=False) + tm.assert_frame_equal(df, rs_c, check_names=False) chunksize = 5 N = int(chunksize * 2.5) # dupe cols - df = mkdf(N, 3) + df = tm.makeCustomDataframe(N, 3) df.columns = ["a", "a", "b"] _check_df(df, None) @@ -252,12 +245,14 @@ def make_dtnat_arr(n, nnat=None): s2 = make_dtnat_arr(chunksize + 5, 0) # s3=make_dtnjat_arr(chunksize+5,0) - with ensure_clean("1.csv") as pth: + with tm.ensure_clean("1.csv") as pth: df = DataFrame(dict(a=s1, b=s2)) df.to_csv(pth, chunksize=chunksize) recons = self.read_csv(pth)._convert(datetime=True, coerce=True) - assert_frame_equal(df, recons, check_names=False, check_less_precise=True) + tm.assert_frame_equal( + df, recons, check_names=False, check_less_precise=True + ) @pytest.mark.slow def test_to_csv_moar(self): @@ -271,13 +266,13 @@ def _do_test( kwargs["index_col"] = list(range(rnlvl)) kwargs["header"] = list(range(cnlvl)) - with ensure_clean("__tmp_to_csv_moar__") as path: + with tm.ensure_clean("__tmp_to_csv_moar__") as path: df.to_csv(path, encoding="utf8", chunksize=chunksize) recons = self.read_csv(path, **kwargs) else: kwargs["header"] = 0 - with ensure_clean("__tmp_to_csv_moar__") as path: + with tm.ensure_clean("__tmp_to_csv_moar__") as path: df.to_csv(path, encoding="utf8", chunksize=chunksize) recons = self.read_csv(path, **kwargs) @@ -359,7 +354,9 @@ def _to_uni(x): recons.columns = np.array(recons.columns, dtype=c_dtype) df.columns = np.array(df.columns, dtype=c_dtype) - assert_frame_equal(df, recons, check_names=False, check_less_precise=True) + tm.assert_frame_equal( + df, recons, check_names=False, check_less_precise=True + ) N = 100 chunksize = 1000 @@ -382,7 +379,13 @@ def _to_uni(x): base, base + 1, ]: - _do_test(mkdf(nrows, ncols, r_idx_type="dt", c_idx_type="s"), "dt", "s") + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type="dt", c_idx_type="s" + ), + "dt", + "s", + ) for ncols in [4]: base = int((chunksize // ncols or 1) or 1) @@ -402,7 +405,13 @@ def _to_uni(x): base, base + 1, ]: - _do_test(mkdf(nrows, ncols, r_idx_type="dt", c_idx_type="s"), "dt", "s") + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type="dt", c_idx_type="s" + ), + "dt", + "s", + ) pass for r_idx_type, c_idx_type in [("i", "i"), ("s", "s"), ("u", "dt"), ("p", "p")]: @@ -425,7 +434,7 @@ def _to_uni(x): base + 1, ]: _do_test( - mkdf( + tm.makeCustomDataframe( nrows, ncols, r_idx_type=r_idx_type, c_idx_type=c_idx_type ), r_idx_type, @@ -450,10 +459,10 @@ def _to_uni(x): base, base + 1, ]: - _do_test(mkdf(nrows, ncols)) + _do_test(tm.makeCustomDataframe(nrows, ncols)) for nrows in [10, N - 2, N - 1, N, N + 1, N + 2]: - df = mkdf(nrows, 3) + df = tm.makeCustomDataframe(nrows, 3) cols = list(df.columns) cols[:2] = ["dupe", "dupe"] cols[-2:] = ["dupe", "dupe"] @@ -465,7 +474,9 @@ def _to_uni(x): _do_test(df, dupe_col=True) _do_test(DataFrame(index=np.arange(10))) - _do_test(mkdf(chunksize // 2 + 1, 2, r_idx_nlevels=2), rnlvl=2) + _do_test( + tm.makeCustomDataframe(chunksize // 2 + 1, 2, r_idx_nlevels=2), rnlvl=2 + ) for ncols in [2, 3, 4]: base = int(chunksize // ncols) for nrows in [ @@ -484,10 +495,12 @@ def _to_uni(x): base, base + 1, ]: - _do_test(mkdf(nrows, ncols, r_idx_nlevels=2), rnlvl=2) - _do_test(mkdf(nrows, ncols, c_idx_nlevels=2), cnlvl=2) + _do_test(tm.makeCustomDataframe(nrows, ncols, r_idx_nlevels=2), rnlvl=2) + _do_test(tm.makeCustomDataframe(nrows, ncols, c_idx_nlevels=2), cnlvl=2) _do_test( - mkdf(nrows, ncols, r_idx_nlevels=2, c_idx_nlevels=2), + tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=2, c_idx_nlevels=2 + ), rnlvl=2, cnlvl=2, ) @@ -499,13 +512,13 @@ def test_to_csv_from_csv_w_some_infs(self, float_frame): f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] float_frame["H"] = float_frame.index.map(f) - with ensure_clean() as path: + with tm.ensure_clean() as path: float_frame.to_csv(path) recons = self.read_csv(path) # TODO to_csv drops column name - assert_frame_equal(float_frame, recons, check_names=False) - assert_frame_equal( + tm.assert_frame_equal(float_frame, recons, check_names=False) + tm.assert_frame_equal( np.isinf(float_frame), np.isinf(recons), check_names=False ) @@ -515,27 +528,27 @@ def test_to_csv_from_csv_w_all_infs(self, float_frame): float_frame["E"] = np.inf float_frame["F"] = -np.inf - with ensure_clean() as path: + with tm.ensure_clean() as path: float_frame.to_csv(path) recons = self.read_csv(path) # TODO to_csv drops column name - assert_frame_equal(float_frame, recons, check_names=False) - assert_frame_equal( + tm.assert_frame_equal(float_frame, recons, check_names=False) + tm.assert_frame_equal( np.isinf(float_frame), np.isinf(recons), check_names=False ) def test_to_csv_no_index(self): # GH 3624, after appending columns, to_csv fails - with ensure_clean("__tmp_to_csv_no_index__") as path: + with tm.ensure_clean("__tmp_to_csv_no_index__") as path: df = DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]}) df.to_csv(path, index=False) result = read_csv(path) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) df["c3"] = Series([7, 8, 9], dtype="int64") df.to_csv(path, index=False) result = read_csv(path) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) def test_to_csv_with_mix_columns(self): # gh-11637: incorrect output when a mix of integer and string column @@ -550,17 +563,17 @@ def test_to_csv_headers(self): # causes to_csv to have different header semantics. from_df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) to_df = DataFrame([[1, 2], [3, 4]], columns=["X", "Y"]) - with ensure_clean("__tmp_to_csv_headers__") as path: + with tm.ensure_clean("__tmp_to_csv_headers__") as path: from_df.to_csv(path, header=["X", "Y"]) recons = self.read_csv(path) - assert_frame_equal(to_df, recons) + tm.assert_frame_equal(to_df, recons) from_df.to_csv(path, index=False, header=["X", "Y"]) recons = self.read_csv(path) recons.reset_index(inplace=True) - assert_frame_equal(to_df, recons) + tm.assert_frame_equal(to_df, recons) def test_to_csv_multiindex(self, float_frame, datetime_frame): @@ -570,7 +583,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index - with ensure_clean("__tmp_to_csv_multiindex__") as path: + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: frame.to_csv(path, header=False) frame.to_csv(path, columns=["A", "B"]) @@ -581,7 +594,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): df = self.read_csv(path, index_col=[0, 1], parse_dates=False) # TODO to_csv drops column name - assert_frame_equal(frame, df, check_names=False) + tm.assert_frame_equal(frame, df, check_names=False) assert frame.index.names == df.index.names # needed if setUp becomes a class method @@ -597,7 +610,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): recons = self.read_csv(path, index_col=[0, 1]) # TODO to_csv drops column name - assert_frame_equal(tsframe, recons, check_names=False) + tm.assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) @@ -607,12 +620,12 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): # no index tsframe.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) - assert_almost_equal(recons.values, datetime_frame.values) + tm.assert_almost_equal(recons.values, datetime_frame.values) # needed if setUp becomes class method datetime_frame.index = old_index - with ensure_clean("__tmp_to_csv_multiindex__") as path: + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: # GH3571, GH1651, GH3141 def _make_frame(names=None): @@ -627,28 +640,28 @@ def _make_frame(names=None): ) # column & index are multi-index - df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # column is mi - df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=0) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # dup column names? - df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # writing with no index df = _make_frame() df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) @@ -656,18 +669,18 @@ def _make_frame(names=None): result = read_csv(path, header=[0, 1]) assert com.all_none(*result.columns.names) result.columns.names = df.columns.names - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # whatsnew example df = _make_frame() df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) df = _make_frame(True) df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # invalid options df = _make_frame(True) @@ -683,7 +696,7 @@ def _make_frame(names=None): with pytest.raises(TypeError, match=msg): df.to_csv(path, columns=["foo", "bar"]) - with ensure_clean("__tmp_to_csv_multiindex__") as path: + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: # empty tsframe[:0].to_csv(path) recons = self.read_csv(path) @@ -698,7 +711,7 @@ def test_to_csv_interval_index(self): # GH 28210 df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) - with ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: df.to_csv(path) result = self.read_csv(path, index_col=0) @@ -706,13 +719,13 @@ def test_to_csv_interval_index(self): expected = df.copy() expected.index = expected.index.astype(str) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_to_csv_float32_nanrep(self): df = DataFrame(np.random.randn(1, 4).astype(np.float32)) df[1] = np.nan - with ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: + with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: df.to_csv(path, na_rep=999) with open(path) as f: @@ -724,10 +737,10 @@ def test_to_csv_withcommas(self): # Commas inside fields should be correctly escaped when saving as CSV. df = DataFrame({"A": [1, 2, 3], "B": ["5,6", "7,8", "9,0"]}) - with ensure_clean("__tmp_to_csv_withcommas__.csv") as path: + with tm.ensure_clean("__tmp_to_csv_withcommas__.csv") as path: df.to_csv(path) df2 = self.read_csv(path) - assert_frame_equal(df2, df) + tm.assert_frame_equal(df2, df) def test_to_csv_mixed(self): def create_cols(name): @@ -766,12 +779,12 @@ def create_cols(name): for c in create_cols(n): dtypes[c] = dtype - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename) rs = read_csv( filename, index_col=0, dtype=dtypes, parse_dates=create_cols("date") ) - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) def test_to_csv_dups_cols(self): @@ -781,11 +794,11 @@ def test_to_csv_dups_cols(self): dtype="float64", ) - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename) # single dtype, fine result = read_csv(filename, index_col=0) result.columns = df.columns - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) df_float = DataFrame(np.random.randn(1000, 3), dtype="float64") df_int = DataFrame(np.random.randn(1000, 3), dtype="int64") @@ -801,7 +814,7 @@ def test_to_csv_dups_cols(self): cols.extend([0, 1, 2]) df.columns = cols - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename) result = read_csv(filename, index_col=0) @@ -810,22 +823,21 @@ def test_to_csv_dups_cols(self): result[i] = to_datetime(result[i]) result.columns = df.columns - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) # GH3457 - from pandas.util.testing import makeCustomDataframe as mkdf N = 10 - df = mkdf(N, 3) + df = tm.makeCustomDataframe(N, 3) df.columns = ["a", "a", "b"] - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename) # read_csv will rename the dups columns result = read_csv(filename, index_col=0) result = result.rename(columns={"a.1": "a"}) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_to_csv_chunking(self): @@ -835,44 +847,44 @@ def test_to_csv_chunking(self): aa["D"] = aa.A + 3.0 for chunksize in [10000, 50000, 100000]: - with ensure_clean() as filename: + with tm.ensure_clean() as filename: aa.to_csv(filename, chunksize=chunksize) rs = read_csv(filename, index_col=0) - assert_frame_equal(rs, aa) + tm.assert_frame_equal(rs, aa) @pytest.mark.slow def test_to_csv_wide_frame_formatting(self): # Issue #8621 df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename, header=False, index=False) rs = read_csv(filename, header=None) - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) def test_to_csv_bug(self): f1 = StringIO("a,1.0\nb,2.0") df = self.read_csv(f1, header=None) newdf = DataFrame({"t": df[df.columns[0]]}) - with ensure_clean() as path: + with tm.ensure_clean() as path: newdf.to_csv(path) recons = read_csv(path, index_col=0) # don't check_names as t != 1 - assert_frame_equal(recons, newdf, check_names=False) + tm.assert_frame_equal(recons, newdf, check_names=False) def test_to_csv_unicode(self): df = DataFrame({"c/\u03c3": [1, 2, 3]}) - with ensure_clean() as path: + with tm.ensure_clean() as path: df.to_csv(path, encoding="UTF-8") df2 = read_csv(path, index_col=0, encoding="UTF-8") - assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df2) df.to_csv(path, encoding="UTF-8", index=False) df2 = read_csv(path, index_col=None, encoding="UTF-8") - assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df2) def test_to_csv_unicode_index_col(self): buf = StringIO("") @@ -886,7 +898,7 @@ def test_to_csv_unicode_index_col(self): buf.seek(0) df2 = read_csv(buf, index_col=0, encoding="UTF-8") - assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df2) def test_to_csv_stringio(self, float_frame): buf = StringIO() @@ -894,7 +906,7 @@ def test_to_csv_stringio(self, float_frame): buf.seek(0) recons = read_csv(buf, index_col=0) # TODO to_csv drops column name - assert_frame_equal(recons, float_frame, check_names=False) + tm.assert_frame_equal(recons, float_frame, check_names=False) def test_to_csv_float_format(self): @@ -904,7 +916,7 @@ def test_to_csv_float_format(self): columns=["X", "Y", "Z"], ) - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename, float_format="%.2f") @@ -914,7 +926,7 @@ def test_to_csv_float_format(self): index=["A", "B"], columns=["X", "Y", "Z"], ) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_to_csv_unicodewriter_quoting(self): df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]}) @@ -953,7 +965,7 @@ def test_to_csv_line_terminators(self): # see gh-20353 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) - with ensure_clean() as path: + with tm.ensure_clean() as path: # case 1: CRLF as line terminator df.to_csv(path, line_terminator="\r\n") expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n" @@ -961,7 +973,7 @@ def test_to_csv_line_terminators(self): with open(path, mode="rb") as f: assert f.read() == expected - with ensure_clean() as path: + with tm.ensure_clean() as path: # case 2: LF as line terminator df.to_csv(path, line_terminator="\n") expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n" @@ -969,7 +981,7 @@ def test_to_csv_line_terminators(self): with open(path, mode="rb") as f: assert f.read() == expected - with ensure_clean() as path: + with tm.ensure_clean() as path: # case 3: The default line terminator(=os.linesep)(gh-21406) df.to_csv(path) os_linesep = os.linesep.encode("utf-8") @@ -1019,7 +1031,7 @@ def test_to_csv_path_is_none(self, float_frame): csv_str = float_frame.to_csv(path_or_buf=None) assert isinstance(csv_str, str) recons = pd.read_csv(StringIO(csv_str), index_col=0) - assert_frame_equal(float_frame, recons) + tm.assert_frame_equal(float_frame, recons) @pytest.mark.parametrize( "df,encoding", @@ -1043,14 +1055,14 @@ def test_to_csv_path_is_none(self, float_frame): ) def test_to_csv_compression(self, df, encoding, compression): - with ensure_clean() as filename: + with tm.ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv( filename, compression=compression, index_col=0, encoding=encoding ) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv f, _handles = _get_handle( @@ -1065,7 +1077,7 @@ def test_to_csv_compression(self, df, encoding, compression): index_col=0, squeeze=True, ) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: @@ -1074,10 +1086,10 @@ def test_to_csv_compression(self, df, encoding, compression): assert col in text with tm.decompress_file(filename, compression) as fh: - assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) + tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) def test_to_csv_date_format(self, datetime_frame): - with ensure_clean("__tmp_to_csv_date_format__") as path: + with tm.ensure_clean("__tmp_to_csv_date_format__") as path: dt_index = datetime_frame.index datetime_frame = DataFrame( {"A": dt_index, "B": dt_index.shift(1)}, index=dt_index @@ -1094,7 +1106,7 @@ def test_to_csv_date_format(self, datetime_frame): lambda x: int(x.strftime("%Y%m%d")) ) - assert_frame_equal(test, datetime_frame_int) + tm.assert_frame_equal(test, datetime_frame_int) datetime_frame.to_csv(path, date_format="%Y-%m-%d") @@ -1107,7 +1119,7 @@ def test_to_csv_date_format(self, datetime_frame): lambda x: x.strftime("%Y-%m-%d") ) - assert_frame_equal(test, datetime_frame_str) + tm.assert_frame_equal(test, datetime_frame_str) # Check that columns get converted datetime_frame_columns = datetime_frame.T @@ -1123,7 +1135,7 @@ def test_to_csv_date_format(self, datetime_frame): lambda x: x.strftime("%Y%m%d") ) - assert_frame_equal(test, datetime_frame_columns) + tm.assert_frame_equal(test, datetime_frame_columns) # test NaTs nat_index = to_datetime( @@ -1134,11 +1146,11 @@ def test_to_csv_date_format(self, datetime_frame): test = read_csv(path, parse_dates=[0, 1], index_col=0) - assert_frame_equal(test, nat_frame) + tm.assert_frame_equal(test, nat_frame) def test_to_csv_with_dst_transitions(self): - with ensure_clean("csv_date_format_with_dst") as path: + with tm.ensure_clean("csv_date_format_with_dst") as path: # make sure we are not failing on transitions times = pd.date_range( "2013-10-26 23:00", @@ -1158,12 +1170,12 @@ def test_to_csv_with_dst_transitions(self): result.index = to_datetime(result.index, utc=True).tz_convert( "Europe/London" ) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) # GH11619 idx = pd.date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") df = DataFrame({"values": 1, "idx": idx}, index=idx) - with ensure_clean("csv_date_format_with_dst") as path: + with tm.ensure_clean("csv_date_format_with_dst") as path: df.to_csv(path, index=True) result = read_csv(path, index_col=0) result.index = to_datetime(result.index, utc=True).tz_convert( @@ -1172,15 +1184,15 @@ def test_to_csv_with_dst_transitions(self): result["idx"] = to_datetime(result["idx"], utc=True).astype( "datetime64[ns, Europe/Paris]" ) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) # assert working df.astype(str) - with ensure_clean("csv_date_format_with_dst") as path: + with tm.ensure_clean("csv_date_format_with_dst") as path: df.to_pickle(path) result = pd.read_pickle(path) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_to_csv_quoting(self): df = DataFrame( @@ -1331,14 +1343,14 @@ def test_to_csv_single_level_multi_index(self): df = df.reindex(columns=[(1,), (3,)]) expected = ",1,3\n0,1,3\n" result = df.to_csv(line_terminator="\n") - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({"a": [1, 2]}) expected_rows = ["a", "1", "2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) - with ensure_clean("__test_gz_lineend.csv.gz") as path: + with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression="gzip") as f: result = f.read().decode("utf-8")