From dd2907a8a19d4e03bffdaa0007da2d98994788b7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:03:16 +0100 Subject: [PATCH 1/2] BUG: mode not preserving object dtype for string option --- doc/source/whatsnew/v2.1.4.rst | 2 +- pandas/core/series.py | 6 +++++- pandas/tests/series/test_reductions.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 543a9864ced26..c16883380b882 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -25,7 +25,7 @@ Bug fixes - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) -- +- Fixed bug in :meth:`Series.mode` not keeping object dtype when ``infer_string`` is set (:issue:`56183`) .. --------------------------------------------------------------------------- .. _whatsnew_214.other: diff --git a/pandas/core/series.py b/pandas/core/series.py index a9679f22f9933..2eb491b937871 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2287,7 +2287,11 @@ def mode(self, dropna: bool = True) -> Series: # Ensure index is type stable (should always use int index) return self._constructor( - res_values, index=range(len(res_values)), name=self.name, copy=False + res_values, + index=range(len(res_values)), + name=self.name, + copy=False, + dtype=self.dtype, ).__finalize__(self, method="mode") def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 4bbbcf3bf54c2..76353ab25fca6 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -51,6 +51,16 @@ def test_mode_nullable_dtype(any_numeric_ea_dtype): tm.assert_series_equal(result, expected) +def test_mode_infer_string(): + # GH#56183 + pytest.importorskip("pyarrow") + ser = Series(["a", "b"], dtype=object) + with pd.option_context("future.infer_string", True): + result = ser.mode() + expected = Series(["a", "b"], dtype=object) + tm.assert_series_equal(result, expected) + + def test_reductions_td64_with_nat(): # GH#8617 ser = Series([0, pd.NaT], dtype="m8[ns]") From ff719b884438ee1740a41e7f5230d7c6fdbf6ad2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:05:41 +0100 Subject: [PATCH 2/2] Adjust tests in root directory for new string option --- pandas/tests/test_algos.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a6e63dfd5f409..f4445073ad3a1 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1675,7 +1675,7 @@ class TestHashTable: def test_hashtable_unique(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, "make" + tm_dtype + "Index") - s = Series(maker(1000)) + s = Series(maker(1000), dtype=None if tm_dtype != "String" else object) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan @@ -1715,7 +1715,7 @@ def test_hashtable_unique(self, htable, tm_dtype, writable): def test_hashtable_factorize(self, htable, tm_dtype, writable): # output of maker has guaranteed unique elements maker = getattr(tm, "make" + tm_dtype + "Index") - s = Series(maker(1000)) + s = Series(maker(1000), dtype=None if tm_dtype != "String" else object) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan @@ -1948,7 +1948,7 @@ def test_timedelta_mode(self): tm.assert_series_equal(ser.mode(), exp) def test_mixed_dtype(self): - exp = Series(["foo"]) + exp = Series(["foo"], dtype=object) ser = Series([1, "foo", "foo"]) tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) tm.assert_series_equal(ser.mode(), exp)