From ee3062e188012dee131821c487e8d4c1c3ae40c2 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 6 Dec 2022 07:06:24 -0500 Subject: [PATCH 1/8] unwrap setitem indexer for 1D-only extension arrays --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/core/internals/blocks.py | 6 +++++- pandas/tests/extension/base/setitem.py | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 54c7a8697e23f..3f52352eedc03 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -710,6 +710,7 @@ Indexing - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`) - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) +- Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`#####`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) @@ -797,6 +798,7 @@ ExtensionArray - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) - Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`) - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) +- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - Styler diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 95300c888eede..51d5d120ad9c7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1669,7 +1669,11 @@ def _unwrap_setitem_indexer(self, indexer): # Should never have length > 2. Caller is responsible for checking. # Length 1 is reached vis setitem_single_block and setitem_single_column # each of which pass indexer=(pi,) - if len(indexer) == 2: + if len(indexer) == 1: + # GH##### 1D-only non-ndarray may not expect a tuple (e.g. pyarrow) + indexer = indexer[0] + + elif len(indexer) == 2: if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): # GH#44703 went through indexing.maybe_convert_ix diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 8dbf7d47374a6..547c007af59f5 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -58,6 +58,14 @@ def test_setitem_sequence(self, data, box_in_series): assert data[0] == original[1] assert data[1] == original[0] + def test_setitem_sequence_frame(self, data): + # GH##### + original = data.copy() + data = pd.DataFrame({"a": data}) + data.loc[[0, 1], "a"] = [original[1], original[0]] + assert data.loc[0, "a"] == original[1] + assert data.loc[1, "a"] == original[0] + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): ser = pd.Series(data) original = ser.copy() From 98d91b8ba40c6744d56fef7c0bcfb8f88f8beb47 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 6 Dec 2022 07:25:39 -0500 Subject: [PATCH 2/8] fix test --- pandas/tests/extension/base/setitem.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 547c007af59f5..bf888106f385d 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -61,10 +61,10 @@ def test_setitem_sequence(self, data, box_in_series): def test_setitem_sequence_frame(self, data): # GH##### original = data.copy() - data = pd.DataFrame({"a": data}) - data.loc[[0, 1], "a"] = [original[1], original[0]] - assert data.loc[0, "a"] == original[1] - assert data.loc[1, "a"] == original[0] + data = pd.DataFrame({"a": data.copy(), "b": data.copy()}) + data.loc[[0, 1], "b"] = [original[1], original[0]] + assert data.loc[0, "b"] == original[1] + assert data.loc[1, "b"] == original[0] def test_setitem_sequence_mismatched_length_raises(self, data, as_array): ser = pd.Series(data) From 458a3d490b868e55a1d3d5dc82f1337e1f1b19a0 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 6 Dec 2022 07:32:52 -0500 Subject: [PATCH 3/8] gh refs --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/tests/extension/base/setitem.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3f52352eedc03..79d71d77952f6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -710,7 +710,7 @@ Indexing - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`) - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) -- Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`#####`) +- Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`50085`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 51d5d120ad9c7..aae00af7cc8e8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1670,7 +1670,7 @@ def _unwrap_setitem_indexer(self, indexer): # Length 1 is reached vis setitem_single_block and setitem_single_column # each of which pass indexer=(pi,) if len(indexer) == 1: - # GH##### 1D-only non-ndarray may not expect a tuple (e.g. pyarrow) + # GH50085 1D-only non-ndarray may not expect a tuple (e.g. pyarrow) indexer = indexer[0] elif len(indexer) == 2: diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index bf888106f385d..543eca4a2fd48 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -59,7 +59,7 @@ def test_setitem_sequence(self, data, box_in_series): assert data[1] == original[0] def test_setitem_sequence_frame(self, data): - # GH##### + # GH50085 original = data.copy() data = pd.DataFrame({"a": data.copy(), "b": data.copy()}) data.loc[[0, 1], "b"] = [original[1], original[0]] From 8b07ef2870fe7f40369ae5ba215dd7cb294b319d Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 6 Dec 2022 07:54:41 -0500 Subject: [PATCH 4/8] fix whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 79d71d77952f6..12af6df6f8ed5 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -798,7 +798,6 @@ ExtensionArray - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) - Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`) - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) -- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - Styler From 52d1e2bf99061af91dee212719d2b5160e602d52 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 7 Dec 2022 19:41:24 -0500 Subject: [PATCH 5/8] update test --- pandas/tests/extension/base/setitem.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 543eca4a2fd48..874bcc6a72c1a 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -58,14 +58,6 @@ def test_setitem_sequence(self, data, box_in_series): assert data[0] == original[1] assert data[1] == original[0] - def test_setitem_sequence_frame(self, data): - # GH50085 - original = data.copy() - data = pd.DataFrame({"a": data.copy(), "b": data.copy()}) - data.loc[[0, 1], "b"] = [original[1], original[0]] - assert data.loc[0, "b"] == original[1] - assert data.loc[1, "b"] == original[0] - def test_setitem_sequence_mismatched_length_raises(self, data, as_array): ser = pd.Series(data) original = ser.copy() @@ -449,3 +441,11 @@ def test_setitem_invalid(self, data, invalid_scalar): with pytest.raises((ValueError, TypeError), match=msg): data[:] = invalid_scalar + + def test_setitem_2d_values(self, data): + # GH50085 + original = data.copy() + df = pd.DataFrame({"a": data, "b": data}) + df.loc[[0, 1], :] = df.loc[[1, 0], :].values + assert (df.loc[0, :] == original[1]).all() + assert (df.loc[1, :] == original[0]).all() From f8957493380e90781b5c40f4341e9c6d6df7fbc6 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 17 Dec 2022 07:23:11 -0500 Subject: [PATCH 6/8] move fix to ArrowExtensionArray.__setitem__ --- pandas/core/arrays/arrow/array.py | 6 +++++- pandas/core/arrays/base.py | 2 +- pandas/core/internals/blocks.py | 6 +----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 254ff8894b36c..fd0ae3605e502 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -896,10 +896,14 @@ def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: ------- None """ + # GH50085: unwrap 1D indexers + if isinstance(key, tuple) and len(key) == 1: + key = key[0] + key = check_array_indexer(self, key) - indices = self._indexing_key_to_indices(key) value = self._maybe_convert_setitem_value(value) + indices = self._indexing_key_to_indices(key) argsort = np.argsort(indices) indices = indices[argsort] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a3c201b402b0f..6ce70be2bf824 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -346,7 +346,7 @@ def __getitem__( """ raise AbstractMethodError(self) - def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + def __setitem__(self, key, value) -> None: """ Set one or more values inplace. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index aae00af7cc8e8..95300c888eede 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1669,11 +1669,7 @@ def _unwrap_setitem_indexer(self, indexer): # Should never have length > 2. Caller is responsible for checking. # Length 1 is reached vis setitem_single_block and setitem_single_column # each of which pass indexer=(pi,) - if len(indexer) == 1: - # GH50085 1D-only non-ndarray may not expect a tuple (e.g. pyarrow) - indexer = indexer[0] - - elif len(indexer) == 2: + if len(indexer) == 2: if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): # GH#44703 went through indexing.maybe_convert_ix From 6b3c4df256e97e29b8b2f5b988bbd76ab11b85c5 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 17 Dec 2022 07:24:24 -0500 Subject: [PATCH 7/8] remove typing --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index fd0ae3605e502..b9e9913b5a780 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -875,7 +875,7 @@ def pyarrow_meth(data, skip_nulls, **kwargs): return self.dtype.na_value return result.as_py() - def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + def __setitem__(self, key, value) -> None: """Set one or more values inplace. Parameters From 981bdff42316b7f061a677a55310cb9b36f85157 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 18 Dec 2022 08:46:04 -0500 Subject: [PATCH 8/8] remove typing override --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 63940741c3fe3..50069b6e65cfa 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -407,7 +407,7 @@ def _get_getitem_freq(self, key) -> BaseOffset | None: # error: Argument 1 of "__setitem__" is incompatible with supertype # "ExtensionArray"; supertype defines the argument type as "Union[int, # ndarray]" - def __setitem__( # type: ignore[override] + def __setitem__( self, key: int | Sequence[int] | Sequence[bool] | slice, value: NaTType | Any | Sequence[Any],