From ab47999ef1386d98644a707ecf9a52d41f1f7dd1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 25 Aug 2022 14:22:23 +0000 Subject: [PATCH 1/5] REGR: iloc not possible for sparse DataFrame --- doc/source/whatsnew/v1.4.4.rst | 2 +- pandas/core/internals/managers.py | 11 +++++++++-- pandas/tests/indexing/test_iloc.py | 8 ++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index deff6e194c3bd..23b470dc32d1c 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -31,7 +31,7 @@ Fixed regressions - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) -- +- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3084bcea49f05..3597b0f4a393f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1085,11 +1085,15 @@ def fast_xs(self, loc: int) -> SingleBlockManager: dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) n = len(self) - if isinstance(dtype, ExtensionDtype): + + # GH#46406 + immutable_ea = isinstance(dtype, SparseDtype) + + if isinstance(dtype, ExtensionDtype) and not immutable_ea: cls = dtype.construct_array_type() result = cls._empty((n,), dtype=dtype) else: - result = np.empty(n, dtype=dtype) + result = np.empty(n, dtype=object if immutable_ea else dtype) result = ensure_wrapped_if_datetimelike(result) for blk in self.blocks: @@ -1098,6 +1102,9 @@ def fast_xs(self, loc: int) -> SingleBlockManager: for i, rl in enumerate(blk.mgr_locs): result[rl] = blk.iget((i, loc)) + if immutable_ea: + result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + block = new_block(result, placement=slice(0, len(result)), ndim=1) return SingleBlockManager(block, self.axes[0]) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 8cc6b6e73aaea..be28b388e5f21 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -22,6 +22,7 @@ Interval, NaT, Series, + SparseDtype, Timestamp, array, concat, @@ -428,6 +429,13 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) + def test_iloc_getitem_sparse_df(self): + # GH#46406 + df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float)) + result = df.iloc[0] + expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0) + tm.assert_series_equal(result, expected) + def test_iloc_setitem(self): df = DataFrame( np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) From ee3e4bfe07f2dbffacf138bb3b3b587f2c5263c1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 25 Aug 2022 14:48:06 +0000 Subject: [PATCH 2/5] remove rogue trailing whitespace --- doc/source/whatsnew/v1.4.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 23b470dc32d1c..26199db9679e6 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -31,7 +31,7 @@ Fixed regressions - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) -- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) +- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) .. --------------------------------------------------------------------------- From f58cdd946e98ba8a608f79c085a83da49af1fed9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 26 Aug 2022 12:18:41 +0100 Subject: [PATCH 3/5] also test for loc --- pandas/tests/indexing/test_iloc.py | 7 ------- pandas/tests/indexing/test_loc.py | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index be28b388e5f21..c3962be1cbdd7 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -429,13 +429,6 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_getitem_sparse_df(self): - # GH#46406 - df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float)) - result = df.iloc[0] - expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0) - tm.assert_series_equal(result, expected) - def test_iloc_setitem(self): df = DataFrame( np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cf7db65015fa7..4e5571c7087e7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1341,6 +1341,14 @@ def test_loc_getitem_sparse_series(self): expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("indexer", ["loc", "iloc"]) + def test_getitem_single_row_sparse_df(self, indexer): + # GH#46406 + df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float)) + result = getattr(df, indexer)[0] + expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) From 8d90ba656edf385cb393c2e60c8c699bc1709b87 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 26 Aug 2022 12:30:25 +0100 Subject: [PATCH 4/5] mypy fixup --- pandas/core/internals/managers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3597b0f4a393f..9f4c799941afd 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1093,7 +1093,12 @@ def fast_xs(self, loc: int) -> SingleBlockManager: cls = dtype.construct_array_type() result = cls._empty((n,), dtype=dtype) else: - result = np.empty(n, dtype=object if immutable_ea else dtype) + # error: Argument "dtype" to "empty" has incompatible type + # "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected + # "None" + result = np.empty( + n, dtype=object if immutable_ea else dtype # type: ignore[arg-type] + ) result = ensure_wrapped_if_datetimelike(result) for blk in self.blocks: @@ -1103,6 +1108,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: result[rl] = blk.iget((i, loc)) if immutable_ea: + dtype = cast(ExtensionDtype, dtype) result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) block = new_block(result, placement=slice(0, len(result)), ndim=1) From 13a2d9dbfb7310f6ef8f2bb7bb814b062f706642 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 26 Aug 2022 12:30:43 +0100 Subject: [PATCH 5/5] F401 'pandas.SparseDtype' imported but unused --- pandas/tests/indexing/test_iloc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c3962be1cbdd7..8cc6b6e73aaea 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -22,7 +22,6 @@ Interval, NaT, Series, - SparseDtype, Timestamp, array, concat,