From 719b1946e76bbb796efc563be11faff8845563ec Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 4 Jan 2022 12:01:07 -0800
Subject: [PATCH] POC/API/DEPR: errors kwd for fillna

---
 pandas/core/frame.py                       | 12 ++++
 pandas/core/generic.py                     | 35 +++++++--
 pandas/core/internals/array_manager.py     |  9 ++-
 pandas/core/internals/blocks.py            | 82 ++++++++++++++++++----
 pandas/core/internals/managers.py          |  9 ++-
 pandas/core/series.py                      | 12 ++++
 pandas/tests/frame/methods/test_fillna.py  |  2 +-
 pandas/tests/series/methods/test_fillna.py | 12 ++--
 8 files changed, 142 insertions(+), 31 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a39c1b0bf43f2..0cff3bf595101 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5097,6 +5097,7 @@ def fillna(
         inplace: Literal[False] = ...,
         limit=...,
         downcast=...,
+        errors=...,
     ) -> DataFrame:
         ...
 
@@ -5109,6 +5110,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5119,6 +5121,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5130,6 +5133,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5141,6 +5145,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5152,6 +5157,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5164,6 +5170,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5176,6 +5183,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5188,6 +5196,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -5200,6 +5209,7 @@ def fillna(
         inplace: bool = ...,
         limit=...,
         downcast=...,
+        errors=...,
     ) -> DataFrame | None:
         ...
 
@@ -5213,6 +5223,7 @@ def fillna(
         inplace: bool = False,
         limit=None,
         downcast=None,
+        errors=lib.no_default,
     ) -> DataFrame | None:
         return super().fillna(
             value=value,
@@ -5221,6 +5232,7 @@ def fillna(
             inplace=inplace,
             limit=limit,
             downcast=downcast,
+            errors=errors,
         )
 
     def pop(self, item: Hashable) -> Series:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1e25b0f4eb176..021a041f2eecb 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6278,6 +6278,7 @@ def fillna(
         inplace: bool_t = False,
         limit=None,
         downcast=None,
+        errors=lib.no_default,
     ) -> NDFrameT | None:
         """
         Fill NA/NaN values using the specified method.
@@ -6311,6 +6312,14 @@ def fillna(
             A dict of item->dtype of what to downcast if possible,
             or the string 'infer' which will try to downcast to an appropriate
             equal type (e.g. float64 to int64 if possible).
+        errors : {{'raise', 'coerce'}}
+            If the given value cannot be filled into an array with this dtype,
+            do we raise or coerce to a common dtype?
+            Default depends on dtype for backward compatibility. For most dtypes,
+            the default is to coerce. In a future version, the default will be
+            to coerce for all dtypes.
+
+            .. versionadded:: 1.4.0
 
         Returns
         -------
@@ -6390,6 +6399,8 @@ def fillna(
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         value, method = validate_fillna_kwargs(value, method)
+        if errors not in ["raise", "coerce", lib.no_default]:
+            raise ValueError("'errors' must be either 'raise' or 'coerce'")
 
         self._consolidate_inplace()
 
@@ -6403,7 +6414,7 @@ def fillna(
             if not self._mgr.is_single_block and axis == 1:
                 if inplace:
                     raise NotImplementedError()
-                result = self.T.fillna(method=method, limit=limit).T
+                result = self.T.fillna(method=method, limit=limit, errors=errors).T
 
                 return result
 
@@ -6413,7 +6424,7 @@ def fillna(
                 limit=limit,
                 inplace=inplace,
                 coerce=True,
-                downcast=downcast,
+                downcast=downcast,  # TODO: errors
             )
         else:
             if self.ndim == 1:
@@ -6438,7 +6449,11 @@ def fillna(
                     )
 
                 new_data = self._mgr.fillna(
-                    value=value, limit=limit, inplace=inplace, downcast=downcast
+                    value=value,
+                    limit=limit,
+                    inplace=inplace,
+                    downcast=downcast,
+                    errors=errors,
                 )
 
             elif isinstance(value, (dict, ABCSeries)):
@@ -6455,23 +6470,29 @@ def fillna(
                     if k not in result:
                         continue
                     downcast_k = downcast if not is_dict else downcast.get(k)
-                    result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k)
+                    result[k] = result[k].fillna(
+                        v, limit=limit, downcast=downcast_k, errors=errors
+                    )
                 return result if not inplace else None
 
             elif not is_list_like(value):
                 if not self._mgr.is_single_block and axis == 1:
 
-                    result = self.T.fillna(value=value, limit=limit).T
+                    result = self.T.fillna(value=value, limit=limit, errors=errors).T
 
                     new_data = result
                 else:
 
                     new_data = self._mgr.fillna(
-                        value=value, limit=limit, inplace=inplace, downcast=downcast
+                        value=value,
+                        limit=limit,
+                        inplace=inplace,
+                        downcast=downcast,
+                        errors=errors,
                     )
             elif isinstance(value, ABCDataFrame) and self.ndim == 2:
 
-                new_data = self.where(self.notna(), value)._mgr
+                new_data = self.where(self.notna(), value)._mgr  # TODO: errors
             else:
                 raise ValueError(f"invalid fill value with a {type(value)}")
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index ec3a9e8b493e3..ab38097a215d8 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -382,9 +382,14 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
             "shift", periods=periods, axis=axis, fill_value=fill_value
         )
 
-    def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
+    def fillna(self: T, value, limit, inplace: bool, downcast, errors) -> T:
         return self.apply_with_block(
-            "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
+            "fillna",
+            value=value,
+            limit=limit,
+            inplace=inplace,
+            downcast=downcast,
+            errors=errors,
         )
 
     def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 9f242226739ec..9b71c04b855ba 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -434,7 +434,12 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
         return [nb]
 
     def fillna(
-        self, value, limit=None, inplace: bool = False, downcast=None
+        self,
+        value,
+        limit=None,
+        inplace: bool = False,
+        downcast=None,
+        errors=lib.no_default,
     ) -> list[Block]:
         """
         fillna on the block with the value. If we fail, then convert to
@@ -455,7 +460,7 @@ def fillna(
             else:
                 return [self.copy()]
 
-        if self._can_hold_element(value):
+        if self._can_hold_element(value) or errors == "raise":
             nb = self if inplace else self.copy()
             putmask_inplace(nb.values, mask, value)
             return nb._maybe_downcast([nb], downcast)
@@ -1673,9 +1678,35 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
         return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
 
     def fillna(
-        self, value, limit=None, inplace: bool = False, downcast=None
+        self,
+        value,
+        limit=None,
+        inplace: bool = False,
+        downcast=None,
+        errors=lib.no_default,
     ) -> list[Block]:
-        values = self.values.fillna(value=value, limit=limit)
+        try:
+            values = self.values.fillna(value=value, limit=limit)
+        except (ValueError, TypeError):
+            if errors is lib.no_default:
+                warnings.warn(
+                    f"The default behavior of fillna with {self.dtype} is "
+                    "deprecated. In a future version, when the specified value "
+                    "cannot be held in an array, the value and array will be "
+                    "coerced to a common dtype. This is consistent with the "
+                    "behavior with numpy dtypes. To retain the old behavior, "
+                    "pass `errors='raise'` to obj.fillna. To get the future "
+                    "behavior, pass `errors='coerce'`.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+                errors = "raise"
+            if errors == "coerce":
+                blk = self.coerce_to_target_dtype(value)
+                return blk.fillna(value, limit=limit, inplace=True, downcast=downcast)
+            else:
+                raise
+
         return [self.make_block_same_class(values=values)]
 
     def diff(self, n: int, axis: int = 1) -> list[Block]:
@@ -1814,18 +1845,43 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo
         return [self.make_block_same_class(new_values)]
 
     def fillna(
-        self, value, limit=None, inplace: bool = False, downcast=None
+        self,
+        value,
+        limit=None,
+        inplace: bool = False,
+        downcast=None,
+        errors=lib.no_default,
     ) -> list[Block]:
 
-        if not self._can_hold_element(value) and self.dtype.kind != "m":
-            # We support filling a DatetimeTZ with a `value` whose timezone
-            #  is different by coercing to object.
-            # TODO: don't special-case td64
-            return self.coerce_to_target_dtype(value).fillna(
-                value, limit, inplace, downcast
-            )
+        try:
+            new_values = self.values.fillna(value=value, limit=limit)
+        except (ValueError, TypeError) as err:
+            if isinstance(err, ValueError) and "Timezones don't match" not in str(err):
+                # TODO(2.0): remove catching ValueError at all since
+                #  DTA raising here is deprecated
+                raise
+
+            if errors is lib.no_default and self.dtype.kind == "m":
+                # Deprecating special-casing of td64
+                warnings.warn(
+                    f"The default behavior of fillna with {self.dtype} is "
+                    "deprecated. In a future version, when the specified value "
+                    "cannot be held in an array, the value and array will be "
+                    "coerced to a common dtype. This is consistent with the "
+                    "behavior with numpy dtypes. To retain the old behavior, "
+                    "pass `errors='raise'` to obj.fillna. To get the future "
+                    "behavior, pass `errors='coerce'`.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+                raise
+            elif errors == "coerce" or errors is lib.no_default:
+                # for non-td64, the default is already to coerce.
+                blk = self.coerce_to_target_dtype(value)
+                return blk.fillna(value, limit, inplace, downcast)
+            else:
+                raise
 
-        new_values = self.values.fillna(value=value, limit=limit)
         return [self.make_block_same_class(values=new_values)]
 
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 3e0b62da64f42..9b07450df34c7 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -410,9 +410,14 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
 
         return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)
 
-    def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
+    def fillna(self: T, value, limit, inplace: bool, downcast, errors) -> T:
         return self.apply(
-            "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
+            "fillna",
+            value=value,
+            limit=limit,
+            inplace=inplace,
+            downcast=downcast,
+            errors=errors,
         )
 
     def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 81b901b13a42b..8e45d35bd2cc0 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4765,6 +4765,7 @@ def fillna(
         inplace: Literal[False] = ...,
         limit=...,
         downcast=...,
+        errors=...,
     ) -> Series:
         ...
 
@@ -4777,6 +4778,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4787,6 +4789,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4798,6 +4801,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4809,6 +4813,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4820,6 +4825,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4832,6 +4838,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4844,6 +4851,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4856,6 +4864,7 @@ def fillna(
         inplace: Literal[True],
         limit=...,
         downcast=...,
+        errors=...,
     ) -> None:
         ...
 
@@ -4868,6 +4877,7 @@ def fillna(
         inplace: bool = ...,
         limit=...,
         downcast=...,
+        errors=...,
     ) -> Series | None:
         ...
 
@@ -4882,6 +4892,7 @@ def fillna(
         inplace=False,
         limit=None,
         downcast=None,
+        errors=lib.no_default,
     ) -> Series | None:
         return super().fillna(
             value=value,
@@ -4890,6 +4901,7 @@ def fillna(
             inplace=inplace,
             limit=limit,
             downcast=downcast,
+            errors=errors,
         )
 
     def pop(self, item: Hashable) -> Any:
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index b5bdf6a70199c..a463efa0f3fa7 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -172,7 +172,7 @@ def test_na_actions_categorical(self):
 
         msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(TypeError, match=msg):
-            df.fillna(value={"cats": 4, "vals": "c"})
+            df.fillna(value={"cats": 4, "vals": "c"}, errors="raise")
 
         res = df.fillna(method="pad")
         tm.assert_frame_equal(res, df_exp_fill)
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
index 19f61a0a2d6fc..c5a67d9e82b73 100644
--- a/pandas/tests/series/methods/test_fillna.py
+++ b/pandas/tests/series/methods/test_fillna.py
@@ -254,7 +254,7 @@ def test_timedelta_fillna(self, frame_or_series):
         # interpreted as seconds, no longer supported
         msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'"
         with pytest.raises(TypeError, match=msg):
-            obj.fillna(1)
+            obj.fillna(1, errors="raise")
 
         result = obj.fillna(Timedelta(seconds=1))
         expected = Series(
@@ -723,29 +723,29 @@ def test_fillna_categorical_raises(self):
 
         msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(TypeError, match=msg):
-            ser.fillna("d")
+            ser.fillna("d", errors="raise")
 
         msg2 = "Length of 'value' does not match."
         with pytest.raises(ValueError, match=msg2):
             cat.fillna(Series("d"))
 
         with pytest.raises(TypeError, match=msg):
-            ser.fillna({1: "d", 3: "a"})
+            ser.fillna({1: "d", 3: "a"}, errors="raise")
 
         msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
         with pytest.raises(TypeError, match=msg):
-            ser.fillna(["a", "b"])
+            ser.fillna(["a", "b"], errors="raise")
 
         msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
         with pytest.raises(TypeError, match=msg):
-            ser.fillna(("a", "b"))
+            ser.fillna(("a", "b"), errors="raise")
 
         msg = (
             '"value" parameter must be a scalar, dict '
             'or Series, but you passed a "DataFrame"'
         )
         with pytest.raises(TypeError, match=msg):
-            ser.fillna(DataFrame({1: ["a"], 3: ["b"]}))
+            ser.fillna(DataFrame({1: ["a"], 3: ["b"]}), errors="raise")
 
     @pytest.mark.parametrize("dtype", [float, "float32", "float64"])
     @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)