From d36b8c63a738093b17739c287aeb0e625f1f2c9d Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 15 May 2021 21:43:31 -0400 Subject: [PATCH 1/2] TST/CLN: parameterize/dedup replace test2 --- pandas/tests/frame/methods/test_replace.py | 238 +++------------------ 1 file changed, 33 insertions(+), 205 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index fc25d3847867f..fac3cb5feefa9 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1500,213 +1500,41 @@ def test_replace_bytes(self, frame_or_series): class TestDataFrameReplaceRegex: - def test_regex_replace_scalar(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - def test_regex_replace_scalar_inplace(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfobj.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) + @pytest.mark.parametrize( + "data", + [ + {"a": list("ab.."), "b": list("efgh")}, + {"a": list("ab.."), "b": list(range(4))}, + ], + ) + @pytest.mark.parametrize( + "regex,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] + ) + @pytest.mark.parametrize("compile_regex", [True, False]) + @pytest.mark.parametrize("use_regex_value_arg", [True, False]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_regex_replace_scalar( + self, data, regex, value, compile_regex, use_regex_value_arg, inplace + ): + df = DataFrame(data) + expected = df.copy() - # with mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) + if compile_regex: + regex = re.compile(regex) - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) + if use_regex_value_arg: + result = df.replace(regex=regex, value=value, inplace=inplace) + else: + result = df.replace(regex, value, regex=True, inplace=inplace) - # mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) + if inplace: + assert result is None + result = df - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) + if value is np.nan: + expected_replace_val = np.nan + else: + expected_replace_val = "..." - # with mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) + expected.loc[expected["a"] == ".", "a"] = expected_replace_val + tm.assert_frame_equal(result, expected) From cb440cc92dde98eaa0bbf0348796b3f218592f37 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Mon, 17 May 2021 15:52:35 -0400 Subject: [PATCH 2/2] Better arg names, not conditional result --- pandas/tests/frame/methods/test_replace.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 57cb67c18daee..c5bc15effa99a 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1440,24 +1440,27 @@ class TestDataFrameReplaceRegex: ], ) @pytest.mark.parametrize( - "regex,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] + "to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] ) @pytest.mark.parametrize("compile_regex", [True, False]) - @pytest.mark.parametrize("use_regex_value_arg", [True, False]) + @pytest.mark.parametrize("regex_kwarg", [True, False]) @pytest.mark.parametrize("inplace", [True, False]) def test_regex_replace_scalar( - self, data, regex, value, compile_regex, use_regex_value_arg, inplace + self, data, to_replace, value, compile_regex, regex_kwarg, inplace ): df = DataFrame(data) expected = df.copy() if compile_regex: - regex = re.compile(regex) + to_replace = re.compile(to_replace) - if use_regex_value_arg: - result = df.replace(regex=regex, value=value, inplace=inplace) + if regex_kwarg: + regex = to_replace + to_replace = None else: - result = df.replace(regex, value, regex=True, inplace=inplace) + regex = True + + result = df.replace(to_replace, value, inplace=inplace, regex=regex) if inplace: assert result is None