Skip to content

TST/CLN: parameterize/dedup replace test2 #41501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 19, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 35 additions & 204 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1432,213 +1432,44 @@ def test_replace_bytes(self, frame_or_series):


class TestDataFrameReplaceRegex:
def test_regex_replace_scalar(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

def test_regex_replace_scalar_inplace(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfobj.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))
@pytest.mark.parametrize(
"data",
[
{"a": list("ab.."), "b": list("efgh")},
{"a": list("ab.."), "b": list(range(4))},
],
)
@pytest.mark.parametrize(
"to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")]
)
@pytest.mark.parametrize("compile_regex", [True, False])
@pytest.mark.parametrize("regex_kwarg", [True, False])
@pytest.mark.parametrize("inplace", [True, False])
def test_regex_replace_scalar(
self, data, to_replace, value, compile_regex, regex_kwarg, inplace
):
df = DataFrame(data)
expected = df.copy()

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
if compile_regex:
to_replace = re.compile(to_replace)

# with mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
if regex_kwarg:
regex = to_replace
to_replace = None
else:
regex = True

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))
result = df.replace(to_replace, value, inplace=inplace, regex=regex)

# mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))
if inplace:
assert result is None
result = df

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
if value is np.nan:
expected_replace_val = np.nan
else:
expected_replace_val = "..."

# with mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
expected.loc[expected["a"] == ".", "a"] = expected_replace_val
tm.assert_frame_equal(result, expected)