From 1ae8dccbff69f6b99c8127bb0b3ec10e1296c215 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sat, 30 Mar 2024 10:14:16 -0400
Subject: [PATCH 1/4] ENH: Enable fillna(value=None)

---
 doc/source/whatsnew/v3.0.0.rst                |   2 +-
 pandas/core/arrays/_mixins.py                 |   5 +-
 pandas/core/arrays/arrow/array.py             |   2 +-
 pandas/core/arrays/interval.py                |   2 +-
 pandas/core/arrays/masked.py                  |   2 +-
 pandas/core/arrays/sparse/array.py            |   4 +-
 pandas/core/generic.py                        | 169 +++++++++---------
 pandas/core/indexes/base.py                   |   2 +-
 pandas/core/indexes/multi.py                  |   2 +-
 pandas/tests/extension/base/missing.py        |   6 +
 .../tests/extension/decimal/test_decimal.py   |   8 +
 pandas/tests/extension/json/test_json.py      |   7 +
 pandas/tests/frame/methods/test_fillna.py     |  18 +-
 13 files changed, 128 insertions(+), 101 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 74a19472ec835..c0c0c88ef9648 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -35,7 +35,7 @@ Other enhancements
 - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
--
+- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 7f4e6f6666382..370fdb0deb23b 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -335,7 +335,7 @@ def _pad_or_backfill(
         return new_values
 
     @doc(ExtensionArray.fillna)
-    def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
+    def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
         mask = self.isna()
         # error: Argument 2 to "check_value_size" has incompatible type
         # "ExtensionArray"; expected "ndarray"
@@ -354,8 +354,7 @@ def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Sel
             new_values[mask] = value
         else:
             # We validate the fill_value even if there is nothing to fill
-            if value is not None:
-                self._validate_setitem_value(value)
+            self._validate_setitem_value(value)
 
             if not copy:
                 new_values = self[:]
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 84b62563605ac..8f148e2e91551 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1077,7 +1077,7 @@ def _pad_or_backfill(
     @doc(ExtensionArray.fillna)
     def fillna(
         self,
-        value: object | ArrayLike | None = None,
+        value: object | ArrayLike,
         limit: int | None = None,
         copy: bool = True,
     ) -> Self:
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index af666a591b1bc..86f58b48ea3be 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -892,7 +892,7 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
         indexer = obj.argsort()[-1]
         return obj[indexer]
 
-    def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
+    def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
         """
         Fill NA/NaN values using the specified method.
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index d20d7f98b8aa8..190888d281ea9 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -236,7 +236,7 @@ def _pad_or_backfill(
         return new_values
 
     @doc(ExtensionArray.fillna)
-    def fillna(self, value=None, limit: int | None = None, copy: bool = True) -> Self:
+    def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
         mask = self._mask
 
         value = missing.check_value_size(value, mask, len(self))
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index bdcb3219a9875..398fd795aee8a 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -716,7 +716,7 @@ def isna(self) -> Self:  # type: ignore[override]
 
     def fillna(
         self,
-        value=None,
+        value,
         limit: int | None = None,
         copy: bool = True,
     ) -> Self:
@@ -746,8 +746,6 @@ def fillna(
         When ``self.fill_value`` is not NA, the result dtype will be
         ``self.dtype``. Again, this preserves the amount of memory used.
         """
-        if value is None:
-            raise ValueError("Must specify 'value'.")
         new_values = np.where(isna(self.sp_values), value, self.sp_values)
 
         if self._null_fill_value:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 858d2ba82a969..eed9adf7b59cd 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6786,7 +6786,7 @@ def fillna(
     )
     def fillna(
         self,
-        value: Hashable | Mapping | Series | DataFrame | None = None,
+        value: Hashable | Mapping | Series | DataFrame,
         *,
         axis: Axis | None = None,
         inplace: bool = False,
@@ -6814,6 +6814,10 @@ def fillna(
             This is the maximum number of entries along the entire axis
             where NaNs will be filled. Must be greater than 0 if not None.
 
+        Notes
+        -----
+        For non-object dtype, ``value=None`` will use the NA value of the dtype.
+
         Returns
         -------
         {klass} or None
@@ -6909,101 +6913,92 @@ def fillna(
             axis = 0
         axis = self._get_axis_number(axis)
 
-        if value is None:
-            raise ValueError("Must specify a fill 'value'.")
-        else:
-            if self.ndim == 1:
-                if isinstance(value, (dict, ABCSeries)):
-                    if not len(value):
-                        # test_fillna_nonscalar
-                        if inplace:
-                            return None
-                        return self.copy(deep=False)
-                    from pandas import Series
+        if self.ndim == 1:
+            if isinstance(value, (dict, ABCSeries)):
+                if not len(value):
+                    # test_fillna_nonscalar
+                    if inplace:
+                        return None
+                    return self.copy(deep=False)
+                from pandas import Series
 
-                    value = Series(value)
-                    value = value.reindex(self.index)
-                    value = value._values
-                elif not is_list_like(value):
-                    pass
-                else:
-                    raise TypeError(
-                        '"value" parameter must be a scalar, dict '
-                        "or Series, but you passed a "
-                        f'"{type(value).__name__}"'
-                    )
+                value = Series(value)
+                value = value.reindex(self.index)
+                value = value._values
+            elif not is_list_like(value):
+                pass
+            else:
+                raise TypeError(
+                    '"value" parameter must be a scalar, dict '
+                    "or Series, but you passed a "
+                    f'"{type(value).__name__}"'
+                )
 
-                new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
+            new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
 
-            elif isinstance(value, (dict, ABCSeries)):
-                if axis == 1:
-                    raise NotImplementedError(
-                        "Currently only can fill "
-                        "with dict/Series column "
-                        "by column"
-                    )
-                result = self if inplace else self.copy(deep=False)
-                for k, v in value.items():
-                    if k not in result:
-                        continue
+        elif isinstance(value, (dict, ABCSeries)):
+            if axis == 1:
+                raise NotImplementedError(
+                    "Currently only can fill " "with dict/Series column " "by column"
+                )
+            result = self if inplace else self.copy(deep=False)
+            for k, v in value.items():
+                if k not in result:
+                    continue
 
-                    res_k = result[k].fillna(v, limit=limit)
+                res_k = result[k].fillna(v, limit=limit)
 
-                    if not inplace:
-                        result[k] = res_k
+                if not inplace:
+                    result[k] = res_k
+                else:
+                    # We can write into our existing column(s) iff dtype
+                    #  was preserved.
+                    if isinstance(res_k, ABCSeries):
+                        # i.e. 'k' only shows up once in self.columns
+                        if res_k.dtype == result[k].dtype:
+                            result.loc[:, k] = res_k
+                        else:
+                            # Different dtype -> no way to do inplace.
+                            result[k] = res_k
                     else:
-                        # We can write into our existing column(s) iff dtype
-                        #  was preserved.
-                        if isinstance(res_k, ABCSeries):
-                            # i.e. 'k' only shows up once in self.columns
-                            if res_k.dtype == result[k].dtype:
-                                result.loc[:, k] = res_k
+                        # see test_fillna_dict_inplace_nonunique_columns
+                        locs = result.columns.get_loc(k)
+                        if isinstance(locs, slice):
+                            locs = np.arange(self.shape[1])[locs]
+                        elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
+                            locs = locs.nonzero()[0]
+                        elif not (
+                            isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
+                        ):
+                            # Should never be reached, but let's cover our bases
+                            raise NotImplementedError(
+                                "Unexpected get_loc result, please report a bug at "
+                                "https://github.com/pandas-dev/pandas"
+                            )
+
+                        for i, loc in enumerate(locs):
+                            res_loc = res_k.iloc[:, i]
+                            target = self.iloc[:, loc]
+
+                            if res_loc.dtype == target.dtype:
+                                result.iloc[:, loc] = res_loc
                             else:
-                                # Different dtype -> no way to do inplace.
-                                result[k] = res_k
-                        else:
-                            # see test_fillna_dict_inplace_nonunique_columns
-                            locs = result.columns.get_loc(k)
-                            if isinstance(locs, slice):
-                                locs = np.arange(self.shape[1])[locs]
-                            elif (
-                                isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
-                            ):
-                                locs = locs.nonzero()[0]
-                            elif not (
-                                isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
-                            ):
-                                # Should never be reached, but let's cover our bases
-                                raise NotImplementedError(
-                                    "Unexpected get_loc result, please report a bug at "
-                                    "https://github.com/pandas-dev/pandas"
-                                )
-
-                            for i, loc in enumerate(locs):
-                                res_loc = res_k.iloc[:, i]
-                                target = self.iloc[:, loc]
-
-                                if res_loc.dtype == target.dtype:
-                                    result.iloc[:, loc] = res_loc
-                                else:
-                                    result.isetitem(loc, res_loc)
-                if inplace:
-                    return self._update_inplace(result)
-                else:
-                    return result
+                                result.isetitem(loc, res_loc)
+            if inplace:
+                return self._update_inplace(result)
+            else:
+                return result
 
-            elif not is_list_like(value):
-                if axis == 1:
-                    result = self.T.fillna(value=value, limit=limit).T
-                    new_data = result._mgr
-                else:
-                    new_data = self._mgr.fillna(
-                        value=value, limit=limit, inplace=inplace
-                    )
-            elif isinstance(value, ABCDataFrame) and self.ndim == 2:
-                new_data = self.where(self.notna(), value)._mgr
+        elif not is_list_like(value):
+            if axis == 1:
+                result = self.T.fillna(value=value, limit=limit).T
+                new_data = result._mgr
             else:
-                raise ValueError(f"invalid fill value with a {type(value)}")
+                new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
+        elif isinstance(value, ABCDataFrame) and self.ndim == 2:
+            new_data = self.where(self.notna(), value)._mgr
+        else:
+            raise ValueError(f"invalid fill value with a {type(value)}")
 
         result = self._constructor_from_mgr(new_data, axes=new_data.axes)
         if inplace:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 30cf6f0b866ee..5d26bcaa63ecb 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2543,7 +2543,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
 
     notnull = notna
 
-    def fillna(self, value=None):
+    def fillna(self, value):
         """
         Fill NA/NaN values with the specified value.
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 2e554bc848ffe..3be17d28d931c 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1681,7 +1681,7 @@ def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
     # (previously declared in base class "IndexOpsMixin")
     _duplicated = duplicated  # type: ignore[misc]
 
-    def fillna(self, value=None, downcast=None):
+    def fillna(self, value, downcast=None):
         """
         fillna is not implemented for MultiIndex
         """
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
index 328c6cd6164fb..4b9234a9904a2 100644
--- a/pandas/tests/extension/base/missing.py
+++ b/pandas/tests/extension/base/missing.py
@@ -68,6 +68,12 @@ def test_fillna_scalar(self, data_missing):
         expected = data_missing.fillna(valid)
         tm.assert_extension_array_equal(result, expected)
 
+    def test_fillna_with_none(self, data_missing):
+        # GH#57723
+        result = data_missing.fillna(None)
+        expected = data_missing
+        tm.assert_extension_array_equal(result, expected)
+
     def test_fillna_limit_pad(self, data_missing):
         arr = data_missing.take([1, 0, 0, 0, 1])
         result = pd.Series(arr).ffill(limit=2)
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index a2721908e858f..504bafc145108 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -144,6 +144,14 @@ def test_fillna_series(self, data_missing):
         ):
             super().test_fillna_series(data_missing)
 
+    def test_fillna_with_none(self, data_missing):
+        # GH#57723
+        # EAs that don't have special logic for None will raise, unlike pandas'
+        # which interpret None as the NA value for the dtype.
+        msg = "conversion from NoneType to Decimal is not supported"
+        with pytest.raises(TypeError, match=msg):
+            super().test_fillna_with_none(data_missing)
+
     @pytest.mark.parametrize("dropna", [True, False])
     def test_value_counts(self, all_data, dropna):
         all_data = all_data[:10]
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 6ecbf2063f203..22ac9627f6cda 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -149,6 +149,13 @@ def test_fillna_frame(self):
         """We treat dictionaries as a mapping in fillna, not a scalar."""
         super().test_fillna_frame()
 
+    def test_fillna_with_none(self, data_missing):
+        # GH#57723
+        # EAs that don't have special logic for None will raise, unlike pandas'
+        # which interpret None as the NA value for the dtype.
+        with pytest.raises(AssertionError):
+            super().test_fillna_with_none(data_missing)
+
     @pytest.mark.parametrize(
         "limit_area, input_ilocs, expected_ilocs",
         [
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 81f66cfd48b0a..77df2ee67ce75 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -64,8 +64,8 @@ def test_fillna_datetime(self, datetime_frame):
             padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
         ).all()
 
-        msg = "Must specify a fill 'value'"
-        with pytest.raises(ValueError, match=msg):
+        msg = r"NDFrame.fillna\(\) missing 1 required positional argument: 'value'"
+        with pytest.raises(TypeError, match=msg):
             datetime_frame.fillna()
 
     @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
@@ -779,3 +779,17 @@ def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
     expected = DataFrame(expected_data)
     result = getattr(df, method)(**kwargs)
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("test_frame", [True, False])
+@pytest.mark.parametrize("dtype", ["float", "object"])
+def test_fillna_with_none_object(test_frame, dtype):
+    # GH#57723
+    obj = Series([1, np.nan, 3], dtype=dtype)
+    if test_frame:
+        obj = obj.to_frame()
+    result = obj.fillna(value=None)
+    expected = Series([1, None, 3], dtype=dtype)
+    if test_frame:
+        expected = expected.to_frame()
+    tm.assert_equal(result, expected)

From 0f171fac73fe7de07192d3314c3cb250e4846955 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sun, 31 Mar 2024 07:45:11 -0400
Subject: [PATCH 2/4] fixups

---
 pandas/core/generic.py                    | 14 +++++++-------
 pandas/tests/frame/methods/test_fillna.py |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index eed9adf7b59cd..7030a966ba08c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6752,7 +6752,7 @@ def _pad_or_backfill(
     @overload
     def fillna(
         self,
-        value: Hashable | Mapping | Series | DataFrame = ...,
+        value: Hashable | Mapping | Series | DataFrame,
         *,
         axis: Axis | None = ...,
         inplace: Literal[False] = ...,
@@ -6762,7 +6762,7 @@ def fillna(
     @overload
     def fillna(
         self,
-        value: Hashable | Mapping | Series | DataFrame = ...,
+        value: Hashable | Mapping | Series | DataFrame,
         *,
         axis: Axis | None = ...,
         inplace: Literal[True],
@@ -6772,7 +6772,7 @@ def fillna(
     @overload
     def fillna(
         self,
-        value: Hashable | Mapping | Series | DataFrame = ...,
+        value: Hashable | Mapping | Series | DataFrame,
         *,
         axis: Axis | None = ...,
         inplace: bool = ...,
@@ -6814,10 +6814,6 @@ def fillna(
             This is the maximum number of entries along the entire axis
             where NaNs will be filled. Must be greater than 0 if not None.
 
-        Notes
-        -----
-        For non-object dtype, ``value=None`` will use the NA value of the dtype.
-
         Returns
         -------
         {klass} or None
@@ -6831,6 +6827,10 @@ def fillna(
         reindex : Conform object to new index.
         asfreq : Convert TimeSeries to specified frequency.
 
+        Notes
+        -----
+        For non-object dtype, ``value=None`` will use the NA value of the dtype.
+
         Examples
         --------
         >>> df = pd.DataFrame(
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 77df2ee67ce75..c89d0c8a0fc62 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -64,7 +64,7 @@ def test_fillna_datetime(self, datetime_frame):
             padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
         ).all()
 
-        msg = r"NDFrame.fillna\(\) missing 1 required positional argument: 'value'"
+        msg = r"missing 1 required positional argument: 'value'"
         with pytest.raises(TypeError, match=msg):
             datetime_frame.fillna()
 

From 80faa789e4579abfed51a79026a13452d85c99e4 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sun, 31 Mar 2024 09:20:48 -0400
Subject: [PATCH 3/4] fixup

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7030a966ba08c..84bbc0961e846 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6939,7 +6939,7 @@ def fillna(
         elif isinstance(value, (dict, ABCSeries)):
             if axis == 1:
                 raise NotImplementedError(
-                    "Currently only can fill " "with dict/Series column " "by column"
+                    "Currently only can fill with dict/Series column by column"
                 )
             result = self if inplace else self.copy(deep=False)
             for k, v in value.items():

From 9c8bdf9cfe721ade2fd2ea79cab58695087644f2 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Fri, 12 Apr 2024 20:51:49 -0400
Subject: [PATCH 4/4] Improve docs

---
 doc/source/user_guide/missing_data.rst | 21 +++++++++++++++++++++
 pandas/core/generic.py                 |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
index 2e104ac06f9f4..5149bd30dbbef 100644
--- a/doc/source/user_guide/missing_data.rst
+++ b/doc/source/user_guide/missing_data.rst
@@ -386,6 +386,27 @@ Replace NA with a scalar value
    df
    df.fillna(0)
 
+When the data has object dtype, you can control what type of NA values are present.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"a": [pd.NA, np.nan, None]}, dtype=object)
+   df
+   df.fillna(None)
+   df.fillna(np.nan)
+   df.fillna(pd.NA)
+
+However when the dtype is not object, these will all be replaced with the proper NA value for the dtype.
+
+.. ipython:: python
+
+   data = {"np": [1.0, np.nan, np.nan, 2], "arrow": pd.array([1.0, pd.NA, pd.NA, 2], dtype="float64[pyarrow]")}
+   df = pd.DataFrame(data)
+   df
+   df.fillna(None)
+   df.fillna(np.nan)
+   df.fillna(pd.NA)
+
 Fill gaps forward or backward
 
 .. ipython:: python
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7227cfa7615d3..523ca9de201bf 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6830,6 +6830,8 @@ def fillna(
         Notes
         -----
         For non-object dtype, ``value=None`` will use the NA value of the dtype.
+        See more details in the :ref:`Filling missing data<missing_data.fillna>`
+        section.
 
         Examples
         --------