Skip to content

TST/CLN: parameterize/dedup replace test2 #41501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 19, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 33 additions & 205 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,213 +1500,41 @@ def test_replace_bytes(self, frame_or_series):


class TestDataFrameReplaceRegex:
def test_regex_replace_scalar(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

def test_regex_replace_scalar_inplace(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfobj.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
@pytest.mark.parametrize(
"data",
[
{"a": list("ab.."), "b": list("efgh")},
{"a": list("ab.."), "b": list(range(4))},
],
)
@pytest.mark.parametrize(
"regex,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

regex -> to_replace to match DataFrame.replace parameter name

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed

)
@pytest.mark.parametrize("compile_regex", [True, False])
@pytest.mark.parametrize("use_regex_value_arg", [True, False])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use_regex_value_arg -> regex (or regex_kwarg)

There's also a redundant permutation here can maybe add a skip for that case instead of calling with regex=True explictily.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed arg. Could you please elaborate on the redundant permutation here? Are you referring to when compile_regex=True and regex_kwarg=False (so regex=True does not really need to be specified?)

@pytest.mark.parametrize("inplace", [True, False])
def test_regex_replace_scalar(
self, data, regex, value, compile_regex, use_regex_value_arg, inplace
):
df = DataFrame(data)
expected = df.copy()

# with mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
if compile_regex:
regex = re.compile(regex)

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))
if use_regex_value_arg:
result = df.replace(regex=regex, value=value, inplace=inplace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you put the arguments in the same order as the parameters

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

else:
result = df.replace(regex, value, regex=True, inplace=inplace)

# mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))
if inplace:
assert result is None
result = df

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
if value is np.nan:
expected_replace_val = np.nan
else:
expected_replace_val = "..."

# with mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
expected.loc[expected["a"] == ".", "a"] = expected_replace_val
tm.assert_frame_equal(result, expected)