From 3c92f7255fb06e6b1b7284eb14cac35a73a5f344 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 6 Feb 2022 20:33:59 -0800 Subject: [PATCH 1/4] TST: Parameterize tests --- pandas/tests/arithmetic/test_timedelta64.py | 1 + pandas/tests/arrays/sparse/test_indexing.py | 12 ++++----- pandas/tests/frame/indexing/test_indexing.py | 17 +++++------- pandas/tests/indexes/period/test_tools.py | 28 ++++++++++++-------- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 3bc38c3e38213..1878b8c08b9a2 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -538,6 +538,7 @@ def test_timedelta(self, freq): expected = index + timedelta(-1) tm.assert_index_equal(result, expected) + def test_timedelta_tick_arithmetic(self): # GH#4134, buggy with timedeltas rng = pd.date_range("2013", "2014") s = Series(rng) diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py index 2794fe33e53e5..580c82552998a 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -13,13 +13,11 @@ class TestGetitem: - def test_getitem(self): - def _checkit(i): - tm.assert_almost_equal(arr[i], arr.to_dense()[i]) - - for i in range(len(arr)): - _checkit(i) - _checkit(-i) + @pytest.mark.parametrize("mul", [1, -1]) + @pytest.mark.parametrize("idx", range(len(arr))) + def test_getitem(self, mul, idx): + i = mul * idx + tm.assert_almost_equal(arr[i], arr.to_dense()[i]) def test_getitem_arraylike_mask(self): arr = SparseArray([0, 1, 2]) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 34f00477672d6..16602046da43e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -195,17 +195,14 @@ def test_getitem_boolean_casting(self, datetime_frame): ) tm.assert_series_equal(result, expected) - def test_getitem_boolean_list(self): + @pytest.mark.parametrize( + "lst", [[True, False, True], [True, True, True], [False, False, False]] + ) + def test_getitem_boolean_list(self, lst): df = DataFrame(np.arange(12).reshape(3, 4)) - - def _checkit(lst): - result = df[lst] - expected = df.loc[df.index[lst]] - tm.assert_frame_equal(result, expected) - - _checkit([True, False, True]) - _checkit([True, True, True]) - _checkit([False, False, False]) + result = df[lst] + expected = df.loc[df.index[lst]] + tm.assert_frame_equal(result, expected) def test_getitem_boolean_iadd(self): arr = np.random.randn(5, 5) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 82a3721b0cbb9..41b76d6aced23 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -14,22 +14,28 @@ class TestPeriodRepresentation: Wish to match NumPy units """ - def _check_freq(self, freq, base_date): + @pytest.mark.parametrize( + "freq, base_date", + [ + ("W-THU", "1970-01-01"), + ("D", "1970-01-01"), + ("B", "1970-01-01"), + ("H", "1970-01-01"), + ("T", "1970-01-01"), + ("S", "1970-01-01"), + ("L", "1970-01-01"), + ("U", "1970-01-01"), + ("N", "1970-01-01"), + ("M", "1970-01"), + ("A", 1970), + ], + ) + def test_freq(self, freq, base_date): rng = period_range(start=base_date, periods=10, freq=freq) exp = np.arange(10, dtype=np.int64) tm.assert_numpy_array_equal(rng.asi8, exp) - def test_annual(self): - self._check_freq("A", 1970) - - def test_monthly(self): - self._check_freq("M", "1970-01") - - @pytest.mark.parametrize("freq", ["W-THU", "D", "B", "H", "T", "S", "L", "U", "N"]) - def test_freq(self, freq): - self._check_freq(freq, "1970-01-01") - class TestPeriodIndexConversion: def test_tolist(self): From e9f22df311e4febb111d8842d899be59f4c808e9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Feb 2022 10:17:29 -0800 Subject: [PATCH 2/4] Param more tests --- pandas/tests/frame/test_arithmetic.py | 33 +------- pandas/tests/frame/test_stack_unstack.py | 91 +++++++++++++---------- pandas/tests/io/parser/test_textreader.py | 54 +++++++------- pandas/tests/io/test_pickle.py | 39 ++++++---- 4 files changed, 106 insertions(+), 111 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bde44ddf12a8e..cf39694366bbf 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -847,36 +847,11 @@ def test_arith_mixed(self): expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) tm.assert_frame_equal(result, expected) - def test_arith_getitem_commute(self): + @pytest.mark.parametrize("col", ["A", "B"]) + def test_arith_getitem_commute(self, all_arithmetic_functions, col): df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) - - def _test_op(df, op): - result = op(df, 1) - - if not df.columns.is_unique: - raise ValueError("Only unique columns supported by this test") - - for col in result.columns: - tm.assert_series_equal(result[col], op(df[col], 1)) - - _test_op(df, operator.add) - _test_op(df, operator.sub) - _test_op(df, operator.mul) - _test_op(df, operator.truediv) - _test_op(df, operator.floordiv) - _test_op(df, operator.pow) - - _test_op(df, lambda x, y: y + x) - _test_op(df, lambda x, y: y - x) - _test_op(df, lambda x, y: y * x) - _test_op(df, lambda x, y: y / x) - _test_op(df, lambda x, y: y**x) - - _test_op(df, lambda x, y: x + y) - _test_op(df, lambda x, y: x - y) - _test_op(df, lambda x, y: x * y) - _test_op(df, lambda x, y: x / y) - _test_op(df, lambda x, y: x**y) + result = all_arithmetic_functions(df, 1) + tm.assert_series_equal(result[col], all_arithmetic_functions(df[col], 1)) @pytest.mark.parametrize( "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])] diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index a8ed07d6deda0..7efcc0ce8556b 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -992,40 +992,9 @@ def test_stack_datetime_column_multiIndex(self): expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) tm.assert_frame_equal(result, expected) - def test_stack_partial_multiIndex(self): - # GH 8844 - def _test_stack_with_multiindex(multiindex): - df = DataFrame( - np.arange(3 * len(multiindex)).reshape(3, len(multiindex)), - columns=multiindex, - ) - for level in (-1, 0, 1, [0, 1], [1, 0]): - result = df.stack(level=level, dropna=False) - - if isinstance(level, int): - # Stacking a single level should not make any all-NaN rows, - # so df.stack(level=level, dropna=False) should be the same - # as df.stack(level=level, dropna=True). - expected = df.stack(level=level, dropna=True) - if isinstance(expected, Series): - tm.assert_series_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - - df.columns = MultiIndex.from_tuples( - df.columns.to_numpy(), names=df.columns.names - ) - expected = df.stack(level=level, dropna=False) - if isinstance(expected, Series): - tm.assert_series_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - - full_multiindex = MultiIndex.from_tuples( - [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], - names=["Upper", "Lower"], - ) - for multiindex_columns in ( + @pytest.mark.parametrize( + "multiindex_columns", + [ [0, 1, 2, 3, 4], [0, 1, 2, 3], [0, 1, 2, 4], @@ -1038,12 +1007,56 @@ def _test_stack_with_multiindex(multiindex): [0], [2], [4], - ): - _test_stack_with_multiindex(full_multiindex[multiindex_columns]) - if len(multiindex_columns) > 1: - multiindex_columns.reverse() - _test_stack_with_multiindex(full_multiindex[multiindex_columns]) + [4, 3, 2, 1, 0], + [3, 2, 1, 0], + [4, 2, 1, 0], + [2, 1, 0], + [3, 2, 1], + [4, 3, 2], + [1, 0], + [2, 0], + [3, 0], + ], + ) + @pytest.mark.parametrize("level", (-1, 0, 1, [0, 1], [1, 0])) + def test_stack_partial_multiIndex(self, multiindex_columns, level): + # GH 8844 + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) + multiindex = full_multiindex[multiindex_columns] + df = DataFrame( + np.arange(3 * len(multiindex)).reshape(3, len(multiindex)), + columns=multiindex, + ) + result = df.stack(level=level, dropna=False) + + if isinstance(level, int): + # Stacking a single level should not make any all-NaN rows, + # so df.stack(level=level, dropna=False) should be the same + # as df.stack(level=level, dropna=True). + expected = df.stack(level=level, dropna=True) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + df.columns = MultiIndex.from_tuples( + df.columns.to_numpy(), names=df.columns.names + ) + expected = df.stack(level=level, dropna=False) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + def test_stack_full_multiIndex(self): + # GH 8844 + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]]) result = df.stack(dropna=False) expected = DataFrame( diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index a58ed02d30ef9..f150ed3903443 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -261,34 +261,32 @@ def _make_reader(**kwds): assert (result[1] == exp[1]).all() assert (result[2] == exp[2]).all() - def test_cr_delimited(self): - def _test(text, **kwargs): - nice_text = text.replace("\r", "\r\n") - result = TextReader(StringIO(text), **kwargs).read() - expected = TextReader(StringIO(nice_text), **kwargs).read() - assert_array_dicts_equal(result, expected) - - data = "a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12" - _test(data, delimiter=",") - - data = "a b c\r1 2 3\r4 5 6\r7 8 9\r10 11 12" - _test(data, delim_whitespace=True) - - data = "a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12" - _test(data, delimiter=",") - - sample = ( - "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r" - "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r" - ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0" - ) - _test(sample, delimiter=",") - - data = "A B C\r 2 3\r4 5 6" - _test(data, delim_whitespace=True) - - data = "A B C\r2 3\r4 5 6" - _test(data, delim_whitespace=True) + @pytest.mark.parametrize( + "text, kwargs", + [ + ("a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12", {"delimiter": ","}), + ( + "a b c\r1 2 3\r4 5 6\r7 8 9\r10 11 12", + {"delim_whitespace": True}, + ), + ("a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12", {"delimiter": ","}), + ( + ( + "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r" + "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r" + ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0" + ), + {"delimiter": ","}, + ), + ("A B C\r 2 3\r4 5 6", {"delim_whitespace": True}), + ("A B C\r2 3\r4 5 6", {"delim_whitespace": True}), + ], + ) + def test_cr_delimited(self, text, kwargs): + nice_text = text.replace("\r", "\r\n") + result = TextReader(StringIO(text), **kwargs).read() + expected = TextReader(StringIO(nice_text), **kwargs).read() + assert_array_dicts_equal(result, expected) def test_empty_field_eof(self): data = "a,b,c\n1,2,3\n4,," diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 4700e307f2407..6debd8a0d8dfa 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -552,21 +552,30 @@ def test_pickle_binary_object_compression(compression): tm.assert_frame_equal(df, read_df) -def test_pickle_dataframe_with_multilevel_index( - multiindex_year_month_day_dataframe_random_data, - multiindex_dataframe_random_data, -): - ymd = multiindex_year_month_day_dataframe_random_data - frame = multiindex_dataframe_random_data - - def _test_roundtrip(frame): - unpickled = tm.round_trip_pickle(frame) - tm.assert_frame_equal(frame, unpickled) - - _test_roundtrip(frame) - _test_roundtrip(frame.T) - _test_roundtrip(ymd) - _test_roundtrip(ymd.T) +@pytest.mark.parametrize( + "fixture_name", + [ + "multiindex_year_month_day_dataframe_random_data", + "multiindex_dataframe_random_data", + ], +) +def test_pickle_dataframe_with_multilevel_index(fixture_name, request): + frame = request.getfixturevalue(fixture_name) + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) + + +@pytest.mark.parametrize( + "fixture_name", + [ + "multiindex_year_month_day_dataframe_random_data", + "multiindex_dataframe_random_data", + ], +) +def test_pickle_dataframe_with_multilevel_index_transpose(fixture_name, request): + frame = request.getfixturevalue(fixture_name).T + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) def test_pickle_timeseries_periodindex(): From bf08831e5bd2e1543559e33717a195d920a77737 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Feb 2022 13:23:41 -0800 Subject: [PATCH 3/4] Address review --- pandas/tests/arrays/sparse/test_indexing.py | 6 ++-- pandas/tests/frame/test_arithmetic.py | 5 +-- pandas/tests/io/test_pickle.py | 39 ++++++++------------- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py index 580c82552998a..98b719fae9d8b 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -16,8 +16,10 @@ class TestGetitem: @pytest.mark.parametrize("mul", [1, -1]) @pytest.mark.parametrize("idx", range(len(arr))) def test_getitem(self, mul, idx): - i = mul * idx - tm.assert_almost_equal(arr[i], arr.to_dense()[i]) + dense = arr.to_dense() + for i in range(len(arr)): + tm.assert_almost_equal(arr[i], dense[i]) + tm.assert_almost_equal(arr[-i], dense[-i]) def test_getitem_arraylike_mask(self): arr = SparseArray([0, 1, 2]) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index cf39694366bbf..3e9860d157e5f 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -850,8 +850,9 @@ def test_arith_mixed(self): @pytest.mark.parametrize("col", ["A", "B"]) def test_arith_getitem_commute(self, all_arithmetic_functions, col): df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) - result = all_arithmetic_functions(df, 1) - tm.assert_series_equal(result[col], all_arithmetic_functions(df[col], 1)) + result = all_arithmetic_functions(df, 1)[col] + expected = all_arithmetic_functions(df[col], 1) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])] diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 6debd8a0d8dfa..4700e307f2407 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -552,30 +552,21 @@ def test_pickle_binary_object_compression(compression): tm.assert_frame_equal(df, read_df) -@pytest.mark.parametrize( - "fixture_name", - [ - "multiindex_year_month_day_dataframe_random_data", - "multiindex_dataframe_random_data", - ], -) -def test_pickle_dataframe_with_multilevel_index(fixture_name, request): - frame = request.getfixturevalue(fixture_name) - unpickled = tm.round_trip_pickle(frame) - tm.assert_frame_equal(frame, unpickled) - - -@pytest.mark.parametrize( - "fixture_name", - [ - "multiindex_year_month_day_dataframe_random_data", - "multiindex_dataframe_random_data", - ], -) -def test_pickle_dataframe_with_multilevel_index_transpose(fixture_name, request): - frame = request.getfixturevalue(fixture_name).T - unpickled = tm.round_trip_pickle(frame) - tm.assert_frame_equal(frame, unpickled) +def test_pickle_dataframe_with_multilevel_index( + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, +): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _test_roundtrip(frame): + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) + + _test_roundtrip(frame) + _test_roundtrip(frame.T) + _test_roundtrip(ymd) + _test_roundtrip(ymd.T) def test_pickle_timeseries_periodindex(): From 616708c36f9fade9c9d372a1d7e37cd8bd94c90e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Feb 2022 13:26:16 -0800 Subject: [PATCH 4/4] Remove unnecessary param --- pandas/tests/arrays/sparse/test_indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py index 98b719fae9d8b..7ea36ed041f44 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -13,9 +13,7 @@ class TestGetitem: - @pytest.mark.parametrize("mul", [1, -1]) - @pytest.mark.parametrize("idx", range(len(arr))) - def test_getitem(self, mul, idx): + def test_getitem(self): dense = arr.to_dense() for i in range(len(arr)): tm.assert_almost_equal(arr[i], dense[i])