From 96f44b491090bdbf64480023b250001efd94b4ed Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 15 May 2021 20:46:41 -0400 Subject: [PATCH] TST/CLN: parameterize/dedup replace test --- pandas/tests/frame/methods/test_replace.py | 172 +++++++-------------- 1 file changed, 52 insertions(+), 120 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index fc25d3847867f..e6e992d37fd5d 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -57,129 +57,61 @@ def test_replace_inplace(self, datetime_frame, float_string_frame): assert return_value is None tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) - def test_regex_replace_list_obj(self): - obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} - dfobj = DataFrame(obj) - - # lists of regexes and values - # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] - to_replace_res = [r"\s*\.\s*", r"e|f|g"] - values = [np.nan, "crap"] - res = dfobj.replace(to_replace_res, values, regex=True) - expec = DataFrame( - { - "a": ["a", "b", np.nan, np.nan], - "b": ["crap"] * 3 + ["h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) - - # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] - to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] - values = [r"\1\1", r"\1_crap"] - res = dfobj.replace(to_replace_res, values, regex=True) - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["e_crap", "f_crap", "g_crap", "h"], - "c": ["h", "e_crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) - - # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN - # or vN)] - to_replace_res = [r"\s*(\.)\s*", r"e"] - values = [r"\1\1", r"crap"] - res = dfobj.replace(to_replace_res, values, regex=True) - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["crap", "f", "g", "h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) - - to_replace_res = [r"\s*(\.)\s*", r"e"] - values = [r"\1\1", r"crap"] - res = dfobj.replace(value=values, regex=to_replace_res) - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["crap", "f", "g", "h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) - - def test_regex_replace_list_obj_inplace(self): - # same as above with inplace=True - # lists of regexes and values - obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} - dfobj = DataFrame(obj) - - # lists of regexes and values - # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] - to_replace_res = [r"\s*\.\s*", r"e|f|g"] - values = [np.nan, "crap"] - res = dfobj.copy() - return_value = res.replace(to_replace_res, values, inplace=True, regex=True) - assert return_value is None - expec = DataFrame( - { - "a": ["a", "b", np.nan, np.nan], - "b": ["crap"] * 3 + ["h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) + @pytest.mark.parametrize( + "to_replace,values,expected", + [ + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + ( + [r"\s*\.\s*", r"e|f|g"], + [np.nan, "crap"], + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap"] * 3 + ["h"], + "c": ["h", "crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + ( + [r"\s*(\.)\s*", r"(e|f|g)"], + [r"\1\1", r"\1_crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["e_crap", "f_crap", "g_crap", "h"], + "c": ["h", "e_crap", "l", "o"], + }, + ), + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + ( + [r"\s*(\.)\s*", r"e"], + [r"\1\1", r"crap"], + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + }, + ), + ], + ) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("use_value_regex_args", [True, False]) + def test_regex_replace_list_obj( + self, to_replace, values, expected, inplace, use_value_regex_args + ): + df = DataFrame({"a": list("ab.."), "b": list("efgh"), "c": list("helo")}) - # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] - to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] - values = [r"\1\1", r"\1_crap"] - res = dfobj.copy() - return_value = res.replace(to_replace_res, values, inplace=True, regex=True) - assert return_value is None - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["e_crap", "f_crap", "g_crap", "h"], - "c": ["h", "e_crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) + if use_value_regex_args: + result = df.replace(value=values, regex=to_replace, inplace=inplace) + else: + result = df.replace(to_replace, values, regex=True, inplace=inplace) - # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN - # or vN)] - to_replace_res = [r"\s*(\.)\s*", r"e"] - values = [r"\1\1", r"crap"] - res = dfobj.copy() - return_value = res.replace(to_replace_res, values, inplace=True, regex=True) - assert return_value is None - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["crap", "f", "g", "h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) + if inplace: + assert result is None + result = df - to_replace_res = [r"\s*(\.)\s*", r"e"] - values = [r"\1\1", r"crap"] - res = dfobj.copy() - return_value = res.replace(value=values, regex=to_replace_res, inplace=True) - assert return_value is None - expec = DataFrame( - { - "a": ["a", "b", "..", ".."], - "b": ["crap", "f", "g", "h"], - "c": ["h", "crap", "l", "o"], - } - ) - tm.assert_frame_equal(res, expec) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) def test_regex_replace_list_mixed(self, mix_ab): # mixed frame to make sure this doesn't break things