pandas-dev · jreback · Sep 12, 2021 · Sep 12, 2021 · Sep 12, 2021 · Sep 12, 2021
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -166,4 +166,18 @@ def time_division(self, fill_value):
         self.arr1 / self.arr2
 
 
+class MinMax:
+
+    params = (["min", "max"], [0.0, np.nan])
+    param_names = ["func", "fill_value"]
+
+    def setup(self, func, fill_value):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
+
+    def time_min_max(self, func, fill_value):
+        getattr(self.sp_arr, func)()
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -302,6 +302,7 @@ Performance improvements
 - Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
+- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -437,6 +438,7 @@ Reshaping
 Sparse
 ^^^^^^
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
+- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
 -
 -
 

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -1456,23 +1456,69 @@ def mean(self, axis=0, *args, **kwargs):
             nsparse = self.sp_index.ngaps
             return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)
 
-    def max(self, axis=0, *args, **kwargs):
+    def max(self, axis: int = 0, *args, **kwargs) -> Scalar:
+        """
+        Max of non-NA/null values
+
+        Parameters
+        ----------
+        axis : int, default 0
+            Not Used. NumPy compatibility.
+        *args, **kwargs
+            Not Used. NumPy compatibility.
+
+        Returns
+        -------
+        scalar
+        """
         nv.validate_max(args, kwargs)
+        return self._min_max("max")
 
-        # This condition returns a nan if there are no valid values in the array.
-        if self.size > 0 and self._valid_sp_values.size == 0:
-            return self.fill_value
-        else:
-            return np.nanmax(self, axis)
+    def min(self, axis: int = 0, *args, **kwargs) -> Scalar:
+        """
+        Min of non-NA/null values
+
+        Parameters
+        ----------
+        axis : int, default 0
+            Not Used. NumPy compatibility.
+        *args, **kwargs
+            Not Used. NumPy compatibility.
 
-    def min(self, axis=0, *args, **kwargs):
+        Returns
+        -------
+        scalar
+        """
         nv.validate_min(args, kwargs)
+        return self._min_max("min")
+
+    def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
+        """
+        Min/max of non-NA/null values
 
-        # This condition returns a nan if there are no valid values in the array.
-        if self.size > 0 and self._valid_sp_values.size == 0:
+        Parameters
+        ----------
+        kind : {"min", "max"}
+
+        Returns
+        -------
+        scalar
+        """
+        valid_vals = self._valid_sp_values
+        has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0
+        if len(valid_vals) > 0:
+            sp_min_max = getattr(valid_vals, kind)()
+
+            # If a non-null fill value is currently present, it might be the min/max
+            if has_nonnull_fill_vals:
+                func = max if kind == "max" else min
+                return func(sp_min_max, self.fill_value)
+            else:
+                return sp_min_max
+        elif has_nonnull_fill_vals:
             return self.fill_value
         else:
-            return np.nanmin(self, axis)
+            return na_value_for_dtype(self.dtype.subtype)
 
     # ------------------------------------------------------------------------
     # Ufuncs

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -1362,26 +1362,54 @@ def test_drop_duplicates_fill_value():
 
 
 class TestMinMax:
-    plain_data = np.arange(5).astype(float)
-    data_neg = plain_data * (-1)
-    data_NaN = SparseArray(np.array([0, 1, 2, np.nan, 4]))
-    data_all_NaN = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan]))
-    data_NA_filled = SparseArray(
-        np.array([np.nan, np.nan, np.nan, np.nan, np.nan]), fill_value=5
-    )
-
     @pytest.mark.parametrize(
         "raw_data,max_expected,min_expected",
         [
-            (plain_data, [4], [0]),
-            (data_neg, [0], [-4]),
-            (data_NaN, [4], [0]),
-            (data_all_NaN, [np.nan], [np.nan]),
-            (data_NA_filled, [5], [5]),
+            (np.arange(5.0), [4], [0]),
+            (-np.arange(5.0), [0], [-4]),
+            (np.array([0, 1, 2, np.nan, 4]), [4], [0]),
+            (np.array([np.nan] * 5), [np.nan], [np.nan]),
+            (np.array([]), [np.nan], [np.nan]),
         ],
     )
-    def test_maxmin(self, raw_data, max_expected, min_expected):
+    def test_nan_fill_value(self, raw_data, max_expected, min_expected):
         max_result = SparseArray(raw_data).max()
         min_result = SparseArray(raw_data).min()
         assert max_result in max_expected
         assert min_result in min_expected
+
+    @pytest.mark.parametrize(
+        "fill_value,max_expected,min_expected",
+        [
+            (100, 100, 0),
+            (-100, 1, -100),
+        ],
+    )
+    def test_fill_value(self, fill_value, max_expected, min_expected):
+        arr = SparseArray(
+            np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
+        )
+        max_result = arr.max()
+        assert max_result == max_expected
+
+        min_result = arr.min()
+        assert min_result == min_expected
+
+    @pytest.mark.parametrize("func", ["min", "max"])
+    @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
+    @pytest.mark.parametrize(
+        "dtype,expected",
+        [
+            (SparseDtype(np.float64, np.nan), np.nan),
+            (SparseDtype(np.float64, 5.0), np.nan),
+            (SparseDtype("datetime64[ns]", pd.NaT), pd.NaT),
+            (SparseDtype("datetime64[ns]", pd.to_datetime("2018-05-05")), pd.NaT),
+        ],
+    )
+    def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
+        arr = SparseArray(data, dtype=dtype)
+        result = getattr(arr, func)()
+        if expected == pd.NaT:
+            assert result == pd.NaT
+        else:
+            assert np.isnan(result)