From 833ca2daffc8f6649c8aa04175f6d649af2ac4b5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 30 Sep 2023 21:01:59 +0100
Subject: [PATCH 1/6] Implement masked algorithm for mode

---
 doc/source/whatsnew/v2.2.0.rst            |  2 +-
 pandas/_libs/hashtable_func_helper.pxi.in | 10 +++++++---
 pandas/core/algorithms.py                 |  5 ++++-
 pandas/core/arrays/masked.py              | 10 ++++++++++
 pandas/tests/libs/test_hashtable.py       | 10 +++++-----
 pandas/tests/series/test_reductions.py    | 22 ++++++++++++++++++++++
 6 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 9dc095e6de6ff..81b6b42cd4c06 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -77,7 +77,7 @@ Other enhancements
 - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
-- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
+- Implement masked algorithms for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55339`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 19acd4acbdee7..336af306d410f 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -404,12 +404,13 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
     cdef:
         ndarray[htfunc_t] keys
         ndarray[htfunc_t] modes
+        ndarray[uint8_t] res_mask = None
 
         int64_t[::1] counts
         int64_t count, _, max_count = -1
-        Py_ssize_t nkeys, k, j = 0
+        Py_ssize_t nkeys, k, na_counter, j = 0
 
-    keys, counts, _ = value_count(values, dropna, mask=mask)
+    keys, counts, na_counter = value_count(values, dropna, mask=mask)
     nkeys = len(keys)
 
     modes = np.empty(nkeys, dtype=values.dtype)
@@ -440,7 +441,10 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
 
             modes[j] = keys[k]
 
-    return modes[:j + 1]
+    if na_counter > 0:
+        res_mask = np.zeros(j+1, dtype=np.bool_)
+        res_mask[j] = True
+    return modes[:j + 1], res_mask
 
 
 {{py:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c952178f4c998..81ae53bb8164f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1039,7 +1039,10 @@ def mode(
 
     values = _ensure_data(values)
 
-    npresult = htable.mode(values, dropna=dropna, mask=mask)
+    npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
+    if res_mask is not None:
+        return npresult, res_mask
+
     try:
         npresult = np.sort(npresult)
     except TypeError as err:
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9b85fb0477e6f..3b3073cd5f798 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -69,6 +69,7 @@
 from pandas.core.algorithms import (
     factorize_array,
     isin,
+    mode,
     take,
 )
 from pandas.core.array_algos import (
@@ -1061,6 +1062,15 @@ def value_counts(self, dropna: bool = True) -> Series:
         )
         return Series(arr, index=index, name="count", copy=False)
 
+    def _mode(self, dropna: bool = True) -> Self:
+        if dropna:
+            result = mode(self._data, dropna=dropna, mask=self._mask)
+            res_mask = np.zeros(result.shape, dtype=np.bool_)
+        else:
+            result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
+        result = type(self)(result, res_mask)
+        return result[result.argsort()]
+
     @doc(ExtensionArray.equals)
     def equals(self, other) -> bool:
         if type(self) != type(other):
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 2c8f4c4149528..e54764f9ac4a6 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -644,13 +644,13 @@ def test_mode(self, dtype, writable):
         values = np.repeat(np.arange(N).astype(dtype), 5)
         values[0] = 42
         values.flags.writeable = writable
-        result = ht.mode(values, False)
+        result = ht.mode(values, False)[0]
         assert result == 42
 
     def test_mode_stable(self, dtype, writable):
         values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
         values.flags.writeable = writable
-        keys = ht.mode(values, False)
+        keys = ht.mode(values, False)[0]
         tm.assert_numpy_array_equal(keys, values)
 
 
@@ -658,7 +658,7 @@ def test_modes_with_nans():
     # GH42688, nans aren't mangled
     nulls = [pd.NA, np.nan, pd.NaT, None]
     values = np.array([True] + nulls * 2, dtype=np.object_)
-    modes = ht.mode(values, False)
+    modes = ht.mode(values, False)[0]
     assert modes.size == len(nulls)
 
 
@@ -724,8 +724,8 @@ def test_ismember_no(self, dtype):
 
     def test_mode(self, dtype):
         values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
-        assert ht.mode(values, True) == 42
-        assert np.isnan(ht.mode(values, False))
+        assert ht.mode(values, True)[0] == 42
+        assert np.isnan(ht.mode(values, False)[0])
 
 
 def test_ismember_tuple_with_nans():
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index 1e1ac100b21bf..2dbe520672e35 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -29,6 +29,28 @@ def test_mode_extension_dtype(as_period):
     tm.assert_series_equal(res, ser)
 
 
+def test_mode_nullable_dtype(any_numeric_ea_dtype):
+    # GH#55339
+    ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)
+    result = ser.mode(dropna=False)
+    expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.mode(dropna=True)
+    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser[-1] = pd.NA
+
+    result = ser.mode(dropna=True)
+    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.mode(dropna=False)
+    expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 def test_reductions_td64_with_nat():
     # GH#8617
     ser = Series([0, pd.NaT], dtype="m8[ns]")

From 3cf29305678d0a32e91d223d8259918ecb70a7fa Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 30 Sep 2023 21:07:27 +0100
Subject: [PATCH 2/6] Fix mypy

---
 pandas/core/algorithms.py    | 2 +-
 pandas/core/arrays/masked.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 81ae53bb8164f..ba939d93ff839 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1041,7 +1041,7 @@ def mode(
 
     npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
     if res_mask is not None:
-        return npresult, res_mask
+        return npresult, res_mask  # type: ignore[return-value]
 
     try:
         npresult = np.sort(npresult)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 3b3073cd5f798..287dc7c86db31 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1068,7 +1068,7 @@ def _mode(self, dropna: bool = True) -> Self:
             res_mask = np.zeros(result.shape, dtype=np.bool_)
         else:
             result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
-        result = type(self)(result, res_mask)
+        result = type(self)(result, res_mask)  # type: ignore[arg-type]
         return result[result.argsort()]
 
     @doc(ExtensionArray.equals)

From 5f6baafc7eff37f25d4a0e6a6e915948f8263cb8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 30 Sep 2023 23:39:08 +0100
Subject: [PATCH 3/6] Update v2.2.0.rst

---
 doc/source/whatsnew/v2.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 81b6b42cd4c06..caad970cc87a5 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -77,7 +77,7 @@ Other enhancements
 - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
-- Implement masked algorithms for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55339`)
+- Implement masked algorithms for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55340`)
 -
 
 .. ---------------------------------------------------------------------------

From 87e7a1ee662c28f6ec9a04bae5cb7a699f893028 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sat, 30 Sep 2023 23:39:23 +0100
Subject: [PATCH 4/6] Update test_reductions.py

---
 pandas/tests/series/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index 2dbe520672e35..13082ee99b47a 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -30,7 +30,7 @@ def test_mode_extension_dtype(as_period):
 
 
 def test_mode_nullable_dtype(any_numeric_ea_dtype):
-    # GH#55339
+    # GH#55340
     ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)
     result = ser.mode(dropna=False)
     expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)

From eaf3d13e1456e180bd8df960105ed6fbc57cf85d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 15 Oct 2023 15:45:35 +0200
Subject: [PATCH 5/6] Update v2.2.0.rst

---
 doc/source/whatsnew/v2.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index caad970cc87a5..310728e54132f 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -77,7 +77,6 @@ Other enhancements
 - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
-- Implement masked algorithms for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55340`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -236,6 +235,7 @@ Other Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55340`)
 - Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
 - Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
 - Performance improvement in :meth:`DataFrame.groupby` when aggregating pyarrow timestamp and duration dtypes (:issue:`55031`)

From 0ed2479eb91d8ef025b3d306fe3e28b595daa67f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 15 Oct 2023 15:59:42 +0200
Subject: [PATCH 6/6] Update v2.2.0.rst

---
 doc/source/whatsnew/v2.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 1263373dbb8fa..c7c1bc3427618 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -272,7 +272,7 @@ Other Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
-- Performance improvement for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55340`)
+- Performance improvement for :meth:`Series.value_counts` and :meth:`Series.mode` for masked dtypes (:issue:`54984`, :issue:`55340`)
 - Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
 - Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
 - Performance improvement in :meth:`DataFrame.groupby` when aggregating pyarrow timestamp and duration dtypes (:issue:`55031`)