TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string (#60295)

jorisvandenbossche · web-flow · commit fae3e8034faf · 2024-11-15T10:50:49.000-08:00
* TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string

* fix fillna upcast issue

* fix reshaping of condition in where - only do for 2d blocks
diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
@@ -151,4 +151,6 @@ def re_replacer(s):
     if mask is None:
         values[:] = f(values)
     else:
+        if values.ndim != mask.ndim:
+            mask = np.broadcast_to(mask, values.shape)
         values[mask] = f(values[mask])
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1688,6 +1688,13 @@ def where(self, other, cond) -> list[Block]:
                 if isinstance(self.dtype, (IntervalDtype, StringDtype)):
                     # TestSetitemFloatIntervalWithIntIntervalValues
                     blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
+                    if (
+                        self.ndim == 2
+                        and isinstance(orig_cond, np.ndarray)
+                        and orig_cond.ndim == 1
+                        and not is_1d_only_ea_dtype(blk.dtype)
+                    ):
+                        orig_cond = orig_cond[:, None]
                     return blk.where(orig_other, orig_cond)
 
                 elif isinstance(self, NDArrayBackedExtensionBlock):
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     Categorical,
     DataFrame,
@@ -65,15 +63,20 @@ def test_fillna_datetime(self, datetime_frame):
         with pytest.raises(TypeError, match=msg):
             datetime_frame.fillna()
 
-    # TODO(infer_string) test as actual error instead of xfail
-    @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
-    def test_fillna_mixed_type(self, float_string_frame):
+    def test_fillna_mixed_type(self, float_string_frame, using_infer_string):
         mf = float_string_frame
         mf.loc[mf.index[5:20], "foo"] = np.nan
         mf.loc[mf.index[-10:], "A"] = np.nan
-        # TODO: make stronger assertion here, GH 25640
-        mf.fillna(value=0)
-        mf.ffill()
+
+        result = mf.ffill()
+        assert (
+            result.loc[result.index[-10:], "A"] == result.loc[result.index[-11], "A"]
+        ).all()
+        assert (result.loc[result.index[5:20], "foo"] == "bar").all()
+
+        result = mf.fillna(value=0)
+        assert (result.loc[result.index[-10:], "A"] == 0).all()
+        assert (result.loc[result.index[5:20], "foo"] == 0).all()
 
     def test_fillna_mixed_float(self, mixed_float_frame):
         # mixed numeric (but no float16)
@@ -84,28 +87,21 @@ def test_fillna_mixed_float(self, mixed_float_frame):
         result = mf.ffill()
         _check_mixed_float(result, dtype={"C": None})
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_fillna_different_dtype(self, using_infer_string):
+    def test_fillna_different_dtype(self):
         # with different dtype (GH#3386)
         df = DataFrame(
             [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
         )
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                result = df.fillna({2: "foo"})
-        else:
-            result = df.fillna({2: "foo"})
+        result = df.fillna({2: "foo"})
         expected = DataFrame(
             [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
         )
+        # column is originally float (all-NaN) -> filling with string gives object dtype
+        expected[2] = expected[2].astype("object")
         tm.assert_frame_equal(result, expected)
 
-        if using_infer_string:
-            with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
-                return_value = df.fillna({2: "foo"}, inplace=True)
-        else:
-            return_value = df.fillna({2: "foo"}, inplace=True)
+        return_value = df.fillna({2: "foo"}, inplace=True)
         tm.assert_frame_equal(df, expected)
         assert return_value is None
 
@@ -276,8 +272,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         expected["A"] = 0.0
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_fillna_dtype_conversion(self, using_infer_string):
+    def test_fillna_dtype_conversion(self):
         # make sure that fillna on an empty frame works
         df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
         result = df.dtypes
@@ -292,7 +287,7 @@ def test_fillna_dtype_conversion(self, using_infer_string):
         # empty block
         df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
         result = df.fillna("nan")
-        expected = DataFrame("nan", index=range(3), columns=["A", "B"])
+        expected = DataFrame("nan", dtype="object", index=range(3), columns=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
@@ -540,18 +535,10 @@ def test_fillna_col_reordering(self):
         filled = df.ffill()
         assert df.columns.tolist() == filled.columns.tolist()
 
-    # TODO(infer_string) test as actual error instead of xfail
-    @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
-    def test_fill_corner(self, float_frame, float_string_frame):
-        mf = float_string_frame
-        mf.loc[mf.index[5:20], "foo"] = np.nan
-        mf.loc[mf.index[-10:], "A"] = np.nan
-
-        filled = float_string_frame.fillna(value=0)
-        assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
-        del float_string_frame["foo"]
-
-        float_frame.reindex(columns=[]).fillna(value=0)
+    def test_fill_empty(self, float_frame):
+        df = float_frame.reindex(columns=[])
+        result = df.fillna(value=0)
+        tm.assert_frame_equal(result, df)
 
     def test_fillna_with_columns_and_limit(self):
         # GH40989
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py