-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TST/CLN: parameterize/dedup replace test2 #41501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1500,213 +1500,41 @@ def test_replace_bytes(self, frame_or_series): | |
|
||
|
||
class TestDataFrameReplaceRegex: | ||
def test_regex_replace_scalar(self, mix_ab): | ||
obj = {"a": list("ab.."), "b": list("efgh")} | ||
dfobj = DataFrame(obj) | ||
dfmix = DataFrame(mix_ab) | ||
|
||
# simplest cases | ||
# regex -> value | ||
# obj frame | ||
res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
|
||
# mixed | ||
res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# with mixed | ||
res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# everything with compiled regexs as well | ||
res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
|
||
# mixed | ||
res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# with mixed | ||
res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
def test_regex_replace_scalar_inplace(self, mix_ab): | ||
obj = {"a": list("ab.."), "b": list("efgh")} | ||
dfobj = DataFrame(obj) | ||
dfmix = DataFrame(mix_ab) | ||
|
||
# simplest cases | ||
# regex -> value | ||
# obj frame | ||
res = dfobj.copy() | ||
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
|
||
# mixed | ||
res = dfmix.copy() | ||
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.copy() | ||
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) | ||
assert return_value is None | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# with mixed | ||
res = dfmix.copy() | ||
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) | ||
assert return_value is None | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# everything with compiled regexs as well | ||
res = dfobj.copy() | ||
return_value = res.replace( | ||
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True | ||
) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
|
||
# mixed | ||
res = dfmix.copy() | ||
return_value = res.replace( | ||
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True | ||
) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.copy() | ||
return_value = res.replace( | ||
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True | ||
) | ||
assert return_value is None | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
# with mixed | ||
res = dfmix.copy() | ||
return_value = res.replace( | ||
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True | ||
) | ||
assert return_value is None | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
|
||
res = dfobj.copy() | ||
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
|
||
# mixed | ||
res = dfmix.copy() | ||
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.copy() | ||
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) | ||
assert return_value is None | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
@pytest.mark.parametrize( | ||
"data", | ||
[ | ||
{"a": list("ab.."), "b": list("efgh")}, | ||
{"a": list("ab.."), "b": list(range(4))}, | ||
], | ||
) | ||
@pytest.mark.parametrize( | ||
"regex,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] | ||
) | ||
@pytest.mark.parametrize("compile_regex", [True, False]) | ||
@pytest.mark.parametrize("use_regex_value_arg", [True, False]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use_regex_value_arg -> regex (or regex_kwarg) There's also a redundant permutation here can maybe add a skip for that case instead of calling with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Renamed arg. Could you please elaborate on the redundant permutation here? Are you referring to when |
||
@pytest.mark.parametrize("inplace", [True, False]) | ||
def test_regex_replace_scalar( | ||
self, data, regex, value, compile_regex, use_regex_value_arg, inplace | ||
): | ||
df = DataFrame(data) | ||
expected = df.copy() | ||
|
||
# with mixed | ||
res = dfmix.copy() | ||
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) | ||
assert return_value is None | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
if compile_regex: | ||
regex = re.compile(regex) | ||
|
||
# everything with compiled regexs as well | ||
res = dfobj.copy() | ||
return_value = res.replace( | ||
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True | ||
) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfobj, res.fillna(".")) | ||
if use_regex_value_arg: | ||
result = df.replace(regex=regex, value=value, inplace=inplace) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you put the arguments in the same order as the parameters There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
else: | ||
result = df.replace(regex, value, regex=True, inplace=inplace) | ||
|
||
# mixed | ||
res = dfmix.copy() | ||
return_value = res.replace( | ||
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True | ||
) | ||
assert return_value is None | ||
tm.assert_frame_equal(dfmix, res.fillna(".")) | ||
if inplace: | ||
assert result is None | ||
result = df | ||
|
||
# regex -> regex | ||
# obj frame | ||
res = dfobj.copy() | ||
return_value = res.replace( | ||
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True | ||
) | ||
assert return_value is None | ||
objc = obj.copy() | ||
objc["a"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(objc) | ||
tm.assert_frame_equal(res, expec) | ||
if value is np.nan: | ||
expected_replace_val = np.nan | ||
else: | ||
expected_replace_val = "..." | ||
|
||
# with mixed | ||
res = dfmix.copy() | ||
return_value = res.replace( | ||
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True | ||
) | ||
assert return_value is None | ||
mixc = mix_ab.copy() | ||
mixc["b"] = ["a", "b", "...", "..."] | ||
expec = DataFrame(mixc) | ||
tm.assert_frame_equal(res, expec) | ||
expected.loc[expected["a"] == ".", "a"] = expected_replace_val | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
regex -> to_replace to match DataFrame.replace parameter name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
renamed