From 44f5ae83d736fa99b2e2f6d7201bb9ff61957a51 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 9 Mar 2021 16:20:20 +0100 Subject: [PATCH 1/5] [ArrayManager] TST: run (+fix/skip) pandas/tests/series/indexing tests --- .github/workflows/ci.yml | 4 +--- pandas/core/internals/array_manager.py | 8 +++++++- pandas/core/internals/blocks.py | 1 - pandas/tests/series/indexing/test_getitem.py | 12 ++++++++---- pandas/tests/series/indexing/test_setitem.py | 7 ++++--- pandas/tests/series/indexing/test_where.py | 5 +++++ 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c60522092739..850498378057f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -165,9 +165,7 @@ jobs: pytest pandas/tests/groupby/ pytest pandas/tests/resample/ pytest pandas/tests/reshape/merge - - pytest pandas/tests/series/methods - pytest pandas/tests/series/test_* + pytest pandas/tests/series/ # indexing subset (temporary since other tests don't pass yet) pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 97a2d4037bf26..6f50790573b14 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1153,7 +1153,13 @@ def __init__( def _verify_integrity(self) -> None: (n_rows,) = self.shape assert len(self.arrays) == 1 - assert len(self.arrays[0]) == n_rows + arr = self.arrays[0] + assert len(arr) == n_rows + if not arr.ndim == 1: + raise ValueError( + "Passed array should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) @staticmethod def _normalize_axis(axis): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e084db77692f5..869fcef6b42ef 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1745,7 +1745,6 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo return [self.make_block_same_class(new_values)] def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: - cond = extract_bool_array(cond) assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 7642ccff31c6a..9a166fc8057ed 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -234,10 +234,12 @@ def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self): result = ser["1 days, 10:11:12.001001"] assert result == ser.iloc[1001] - def test_getitem_slice_2d(self, datetime_series): + def test_getitem_slice_2d(self, datetime_series, using_array_manager): # GH#30588 multi-dimensional indexing deprecated - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=not using_array_manager + ): # GH#30867 Don't want to support this long-term, but # for now ensure that the warning from Index # doesn't comes through via Series.__getitem__. @@ -518,9 +520,11 @@ def test_getitem_generator(string_series): Series(date_range("2012-01-01", periods=2, tz="CET")), ], ) -def test_getitem_ndim_deprecated(series): +def test_getitem_ndim_deprecated(series, using_array_manager): with tm.assert_produces_warning( - FutureWarning, match="Support for multi-dimensional indexing" + FutureWarning, + match="Support for multi-dimensional indexing", + check_stacklevel=not using_array_manager, ): result = series[:, None] diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 36ade2c8b8b43..63f337693dbd5 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -299,15 +299,16 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti[1] == ts assert dti.freq == "D" - def test_dt64tz_setitem_does_not_mutate_dti(self): + def test_dt64tz_setitem_does_not_mutate_dti(self, using_array_manager): # GH#21907, GH#24096 dti = date_range("2016-01-01", periods=10, tz="US/Pacific") ts = dti[0] ser = Series(dti) assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base - assert ser._mgr.blocks[0].values is not dti - assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base + if not using_array_manager: + assert ser._mgr.blocks[0].values is not dti + assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base ser[::3] = NaT assert ser[0] is NaT diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 799f3d257434d..b13fd18405839 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.common import is_integer import pandas as pd @@ -471,6 +473,9 @@ def test_where_categorical(klass): tm.assert_equal(exp, res) +# TODO(ArrayManager) DataFrame.values not yet correctly returning datetime array +# for categorical with datetime categories +@td.skip_array_manager_not_yet_implemented def test_where_datetimelike_categorical(tz_naive_fixture): # GH#37682 tz = tz_naive_fixture From 182af4b2c22d0846015a7a750962f4230e0f9c33 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 12:44:52 +0100 Subject: [PATCH 2/5] use .arrays[0] --- pandas/tests/series/indexing/test_setitem.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 63f337693dbd5..88e6f0aa0b887 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -299,16 +299,15 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti[1] == ts assert dti.freq == "D" - def test_dt64tz_setitem_does_not_mutate_dti(self, using_array_manager): + def test_dt64tz_setitem_does_not_mutate_dti(self): # GH#21907, GH#24096 dti = date_range("2016-01-01", periods=10, tz="US/Pacific") ts = dti[0] ser = Series(dti) assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base - if not using_array_manager: - assert ser._mgr.blocks[0].values is not dti - assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base + assert ser._mgr.arrays[0] is not dti + assert ser._mgr.arrays[0]._data.base is not dti._data._data.base ser[::3] = NaT assert ser[0] is NaT From 9143b258151e1468f1ae82920567b844a50af123 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Mar 2021 16:26:00 +0100 Subject: [PATCH 3/5] undo whitespace change --- pandas/core/internals/blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 727453139aeb4..a11ca0aa82b29 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1724,6 +1724,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo return [self.make_block_same_class(new_values)] def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + cond = extract_bool_array(cond) assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) From 6afe8bf4add50d3d2435b8135c684a0dd2198e42 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 11 Mar 2021 11:01:16 +0100 Subject: [PATCH 4/5] fix matplotlib compat --- pandas/core/internals/array_manager.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index f979402b42374..003e788d1c314 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -797,8 +797,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: return type(self)(arrays, new_axes, verify_integrity=False) - getitem_mgr = get_slice - def fast_xs(self, loc: int) -> ArrayLike: """ Return the array corresponding to `frame.iloc[loc]`. @@ -1244,6 +1242,11 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: new_index = self.index[slobj] return type(self)([new_array], [new_index]) + def getitem_mgr(self, indexer) -> SingleArrayManager: + new_array = self.array[indexer] + new_index = self.index[indexer] + return type(self)([new_array], [new_index]) + def apply(self, func, **kwargs): if callable(func): new_array = func(self.array, **kwargs) From 586c0442bfb0bc334b8c3d677f68c015ba6ff499 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Mar 2021 10:14:54 +0100 Subject: [PATCH 5/5] fixup merge --- pandas/core/internals/array_manager.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 17588faf4f06f..bce364f2be4af 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -202,7 +202,7 @@ def get_dtypes(self): def __repr__(self) -> str: output = type(self).__name__ output += f"\nIndex: {self._axes[0]}" - if self.ndim == 1: + if self.ndim == 2: output += f"\nColumns: {self._axes[1]}" output += f"\n{len(self.arrays)} arrays:" for arr in self.arrays: @@ -1230,11 +1230,6 @@ def getitem_mgr(self, indexer) -> SingleArrayManager: new_index = self.index[indexer] return type(self)([new_array], [new_index]) - def getitem_mgr(self, indexer) -> SingleArrayManager: - new_array = self.array[indexer] - new_index = self.index[indexer] - return type(self)([new_array], [new_index]) - def apply(self, func, **kwargs): if callable(func): new_array = func(self.array, **kwargs)