Skip to content

Commit e350f10

Browse files
[backport 2.3.x] TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string (#60295) (#60331)
* TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string (#60295) (cherry picked from commit fae3e80) * fix tests for default mode * fixes * cleanup * update indexing tests
1 parent 99ae39e commit e350f10

File tree

5 files changed

+75
-63
lines changed

5 files changed

+75
-63
lines changed

pandas/core/array_algos/replace.py

+2
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,6 @@ def re_replacer(s):
149149
if mask is None:
150150
values[:] = f(values)
151151
else:
152+
if values.ndim != mask.ndim:
153+
mask = np.broadcast_to(mask, values.shape)
152154
values[mask] = f(values[mask])

pandas/core/internals/blocks.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ def replace(
929929
blocks = blk.convert(
930930
copy=False,
931931
using_cow=using_cow,
932-
convert_string=convert_string or self.dtype != _dtype_obj,
932+
convert_string=convert_string or self.dtype == "string",
933933
)
934934
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
935935
warnings.warn(
@@ -987,7 +987,7 @@ def _replace_regex(
987987
inplace: bool = False,
988988
mask=None,
989989
using_cow: bool = False,
990-
convert_string: bool = True,
990+
convert_string=None,
991991
already_warned=None,
992992
) -> list[Block]:
993993
"""
@@ -1048,10 +1048,18 @@ def _replace_regex(
10481048
already_warned.warned_already = True
10491049

10501050
nbs = block.convert(
1051-
copy=False, using_cow=using_cow, convert_string=convert_string
1051+
copy=False,
1052+
using_cow=using_cow,
1053+
convert_string=convert_string or self.dtype == "string",
10521054
)
10531055
opt = get_option("future.no_silent_downcasting")
1054-
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
1056+
if (
1057+
len(nbs) > 1
1058+
or (
1059+
nbs[0].dtype != block.dtype
1060+
and not (self.dtype == "string" and nbs[0].dtype == "string")
1061+
)
1062+
) and not opt:
10551063
warnings.warn(
10561064
# GH#54710
10571065
"Downcasting behavior in `replace` is deprecated and "
@@ -1088,7 +1096,7 @@ def replace_list(
10881096
values._replace(to_replace=src_list, value=dest_list, inplace=True)
10891097
return [blk]
10901098

1091-
convert_string = self.dtype != _dtype_obj
1099+
convert_string = self.dtype == "string"
10921100

10931101
# Exclude anything that we know we won't contain
10941102
pairs = [
@@ -2167,6 +2175,13 @@ def where(
21672175
if isinstance(self.dtype, (IntervalDtype, StringDtype)):
21682176
# TestSetitemFloatIntervalWithIntIntervalValues
21692177
blk = self.coerce_to_target_dtype(orig_other)
2178+
if (
2179+
self.ndim == 2
2180+
and isinstance(orig_cond, np.ndarray)
2181+
and orig_cond.ndim == 1
2182+
and not is_1d_only_ea_dtype(blk.dtype)
2183+
):
2184+
orig_cond = orig_cond[:, None]
21702185
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
21712186
return self._maybe_downcast(
21722187
nbs, downcast=_downcast, using_cow=using_cow, caller="where"

pandas/tests/frame/methods/test_fillna.py

+6-17
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas.util._test_decorators as td
75

86
from pandas import (
@@ -91,8 +89,6 @@ def test_fillna_datetime(self, datetime_frame):
9189
with pytest.raises(ValueError, match=msg):
9290
datetime_frame.fillna(5, method="ffill")
9391

94-
# TODO(infer_string) test as actual error instead of xfail
95-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
9692
def test_fillna_mixed_type(self, float_string_frame):
9793
mf = float_string_frame
9894
mf.loc[mf.index[5:20], "foo"] = np.nan
@@ -126,7 +122,7 @@ def test_fillna_empty(self, using_copy_on_write):
126122
df.x.fillna(method=m, inplace=True)
127123
df.x.fillna(method=m)
128124

129-
def test_fillna_different_dtype(self, using_infer_string):
125+
def test_fillna_different_dtype(self):
130126
# with different dtype (GH#3386)
131127
df = DataFrame(
132128
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
@@ -136,6 +132,7 @@ def test_fillna_different_dtype(self, using_infer_string):
136132
expected = DataFrame(
137133
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
138134
)
135+
# column is originally float (all-NaN) -> filling with string gives object dtype
139136
expected[2] = expected[2].astype("object")
140137
tm.assert_frame_equal(result, expected)
141138

@@ -654,18 +651,10 @@ def test_fillna_col_reordering(self):
654651
filled = df.fillna(method="ffill")
655652
assert df.columns.tolist() == filled.columns.tolist()
656653

657-
# TODO(infer_string) test as actual error instead of xfail
658-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
659-
def test_fill_corner(self, float_frame, float_string_frame):
660-
mf = float_string_frame
661-
mf.loc[mf.index[5:20], "foo"] = np.nan
662-
mf.loc[mf.index[-10:], "A"] = np.nan
663-
664-
filled = float_string_frame.fillna(value=0)
665-
assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
666-
del float_string_frame["foo"]
667-
668-
float_frame.reindex(columns=[]).fillna(value=0)
654+
def test_fill_empty(self, float_frame):
655+
df = float_frame.reindex(columns=[])
656+
result = df.fillna(value=0)
657+
tm.assert_frame_equal(result, df)
669658

670659
def test_fillna_downcast_dict(self):
671660
# GH#40809

pandas/tests/frame/methods/test_replace.py

+43-38
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
import pandas as pd
1210
from pandas import (
1311
DataFrame,
@@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]:
3028

3129

3230
class TestDataFrameReplace:
33-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
3431
def test_replace_inplace(self, datetime_frame, float_string_frame):
3532
datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
3633
datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
@@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
4643
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
4744

4845
result = float_string_frame.replace(np.nan, 0)
49-
expected = float_string_frame.fillna(value=0)
46+
expected = float_string_frame.copy()
47+
expected["foo"] = expected["foo"].astype(object)
48+
expected = expected.fillna(value=0)
5049
tm.assert_frame_equal(result, expected)
5150

5251
tsframe = datetime_frame.copy()
@@ -290,34 +289,39 @@ def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
290289
tm.assert_frame_equal(result, expected)
291290

292291
def test_regex_replace_dict_nested_gh4115(self):
293-
df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
292+
df = DataFrame(
293+
{"Type": Series(["Q", "T", "Q", "Q", "T"], dtype=object), "tmp": 2}
294+
)
294295
expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
295296
msg = "Downcasting behavior in `replace`"
296297
with tm.assert_produces_warning(FutureWarning, match=msg):
297298
result = df.replace({"Type": {"Q": 0, "T": 1}})
299+
298300
tm.assert_frame_equal(result, expected)
299301

300-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
301-
def test_regex_replace_list_to_scalar(self, mix_abc):
302+
def test_regex_replace_list_to_scalar(self, mix_abc, using_infer_string):
302303
df = DataFrame(mix_abc)
303304
expec = DataFrame(
304305
{
305306
"a": mix_abc["a"],
306-
"b": np.array([np.nan] * 4),
307+
"b": [np.nan] * 4,
307308
"c": [np.nan, np.nan, np.nan, "d"],
308309
}
309310
)
311+
if using_infer_string:
312+
expec["b"] = expec["b"].astype("str")
310313
msg = "Downcasting behavior in `replace`"
311-
with tm.assert_produces_warning(FutureWarning, match=msg):
314+
warn = None if using_infer_string else FutureWarning
315+
with tm.assert_produces_warning(warn, match=msg):
312316
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
313317
res2 = df.copy()
314318
res3 = df.copy()
315-
with tm.assert_produces_warning(FutureWarning, match=msg):
319+
with tm.assert_produces_warning(warn, match=msg):
316320
return_value = res2.replace(
317321
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
318322
)
319323
assert return_value is None
320-
with tm.assert_produces_warning(FutureWarning, match=msg):
324+
with tm.assert_produces_warning(warn, match=msg):
321325
return_value = res3.replace(
322326
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
323327
)
@@ -326,7 +330,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
326330
tm.assert_frame_equal(res2, expec)
327331
tm.assert_frame_equal(res3, expec)
328332

329-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
330333
def test_regex_replace_str_to_numeric(self, mix_abc):
331334
# what happens when you try to replace a numeric value with a regex?
332335
df = DataFrame(mix_abc)
@@ -342,7 +345,6 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
342345
tm.assert_frame_equal(res2, expec)
343346
tm.assert_frame_equal(res3, expec)
344347

345-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
346348
def test_regex_replace_regex_list_to_numeric(self, mix_abc):
347349
df = DataFrame(mix_abc)
348350
res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
@@ -539,21 +541,28 @@ def test_replace_convert(self):
539541
res = rep.dtypes
540542
tm.assert_series_equal(expec, res)
541543

542-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
543544
def test_replace_mixed(self, float_string_frame):
544545
mf = float_string_frame
545546
mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
546547
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
547548

548549
result = float_string_frame.replace(np.nan, -18)
549-
expected = float_string_frame.fillna(value=-18)
550+
expected = float_string_frame.copy()
551+
expected["foo"] = expected["foo"].astype(object)
552+
expected = expected.fillna(value=-18)
550553
tm.assert_frame_equal(result, expected)
551-
tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame)
554+
expected2 = float_string_frame.copy()
555+
expected2["foo"] = expected2["foo"].astype(object)
556+
tm.assert_frame_equal(result.replace(-18, np.nan), expected2)
552557

553558
result = float_string_frame.replace(np.nan, -1e8)
554-
expected = float_string_frame.fillna(value=-1e8)
559+
expected = float_string_frame.copy()
560+
expected["foo"] = expected["foo"].astype(object)
561+
expected = expected.fillna(value=-1e8)
555562
tm.assert_frame_equal(result, expected)
556-
tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame)
563+
expected2 = float_string_frame.copy()
564+
expected2["foo"] = expected2["foo"].astype(object)
565+
tm.assert_frame_equal(result.replace(-1e8, np.nan), expected2)
557566

558567
def test_replace_mixed_int_block_upcasting(self):
559568
# int block upcasting
@@ -614,15 +623,11 @@ def test_replace_mixed2(self, using_infer_string):
614623

615624
expected = DataFrame(
616625
{
617-
"A": Series(["foo", "bar"]),
626+
"A": Series(["foo", "bar"], dtype="object"),
618627
"B": Series([0, "foo"], dtype="object"),
619628
}
620629
)
621-
if using_infer_string:
622-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
623-
result = df.replace([1, 2], ["foo", "bar"])
624-
else:
625-
result = df.replace([1, 2], ["foo", "bar"])
630+
result = df.replace([1, 2], ["foo", "bar"])
626631
tm.assert_frame_equal(result, expected)
627632

628633
def test_replace_mixed3(self):
@@ -931,15 +936,16 @@ def test_replace_limit(self):
931936
# TODO
932937
pass
933938

934-
def test_replace_dict_no_regex(self):
939+
def test_replace_dict_no_regex(self, any_string_dtype):
935940
answer = Series(
936941
{
937942
0: "Strongly Agree",
938943
1: "Agree",
939944
2: "Neutral",
940945
3: "Disagree",
941946
4: "Strongly Disagree",
942-
}
947+
},
948+
dtype=any_string_dtype,
943949
)
944950
weights = {
945951
"Agree": 4,
@@ -954,15 +960,16 @@ def test_replace_dict_no_regex(self):
954960
result = answer.replace(weights)
955961
tm.assert_series_equal(result, expected)
956962

957-
def test_replace_series_no_regex(self):
963+
def test_replace_series_no_regex(self, any_string_dtype):
958964
answer = Series(
959965
{
960966
0: "Strongly Agree",
961967
1: "Agree",
962968
2: "Neutral",
963969
3: "Disagree",
964970
4: "Strongly Disagree",
965-
}
971+
},
972+
dtype=any_string_dtype,
966973
)
967974
weights = Series(
968975
{
@@ -1060,16 +1067,15 @@ def test_nested_dict_overlapping_keys_replace_str(self):
10601067
expected = df.replace({"a": dict(zip(astr, bstr))})
10611068
tm.assert_frame_equal(result, expected)
10621069

1063-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
1064-
def test_replace_swapping_bug(self, using_infer_string):
1070+
def test_replace_swapping_bug(self):
10651071
df = DataFrame({"a": [True, False, True]})
10661072
res = df.replace({"a": {True: "Y", False: "N"}})
1067-
expect = DataFrame({"a": ["Y", "N", "Y"]})
1073+
expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
10681074
tm.assert_frame_equal(res, expect)
10691075

10701076
df = DataFrame({"a": [0, 1, 0]})
10711077
res = df.replace({"a": {0: "Y", 1: "N"}})
1072-
expect = DataFrame({"a": ["Y", "N", "Y"]})
1078+
expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
10731079
tm.assert_frame_equal(res, expect)
10741080

10751081
def test_replace_period(self):
@@ -1345,7 +1351,7 @@ def test_replace_commutative(self, df, to_replace, exp):
13451351
)
13461352
def test_replace_replacer_dtype(self, replacer):
13471353
# GH26632
1348-
df = DataFrame(["a"])
1354+
df = DataFrame(["a"], dtype=object)
13491355
msg = "Downcasting behavior in `replace` "
13501356
with tm.assert_produces_warning(FutureWarning, match=msg):
13511357
result = df.replace({"a": replacer, "b": replacer})
@@ -1462,6 +1468,7 @@ def test_replace_value_category_type(self):
14621468
input_df = input_df.replace("obj1", "obj9")
14631469
result = input_df.replace("cat2", "catX")
14641470

1471+
result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"})
14651472
tm.assert_frame_equal(result, expected)
14661473

14671474
def test_replace_dict_category_type(self):
@@ -1503,13 +1510,11 @@ def test_replace_with_compiled_regex(self):
15031510
expected = DataFrame(["z", "b", "c"])
15041511
tm.assert_frame_equal(result, expected)
15051512

1506-
def test_replace_intervals(self, using_infer_string):
1513+
def test_replace_intervals(self):
15071514
# https://github.com/pandas-dev/pandas/issues/35931
15081515
df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]})
1509-
warning = FutureWarning if using_infer_string else None
1510-
with tm.assert_produces_warning(warning, match="Downcasting"):
1511-
result = df.replace({"a": {pd.Interval(0, 1): "x"}})
1512-
expected = DataFrame({"a": ["x", "x"]})
1516+
result = df.replace({"a": {pd.Interval(0, 1): "x"}})
1517+
expected = DataFrame({"a": ["x", "x"]}, dtype=object)
15131518
tm.assert_frame_equal(result, expected)
15141519

15151520
def test_replace_unicode(self):

pandas/tests/indexing/test_coercion.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ def test_replace_series(self, how, to_key, from_key, replacer, using_infer_strin
856856
else:
857857
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
858858

859-
if using_infer_string and exp.dtype == "string" and obj.dtype == object:
859+
if using_infer_string and exp.dtype == "string":
860860
# with infer_string, we disable the deprecated downcasting behavior
861861
exp = exp.astype(object)
862862

@@ -889,8 +889,9 @@ def test_replace_series_datetime_tz(
889889
assert obj.dtype == from_key
890890

891891
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
892-
if using_infer_string and to_key == "object":
893-
assert exp.dtype == "string"
892+
if using_infer_string and exp.dtype == "string":
893+
# with infer_string, we disable the deprecated downcasting behavior
894+
exp = exp.astype(object)
894895
else:
895896
assert exp.dtype == to_key
896897

0 commit comments

Comments
 (0)