From cc8e7385265b06cd4c4a67fc4536b012385187e1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 May 2024 10:00:43 -0700 Subject: [PATCH 1/3] REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage --- pandas/_testing/__init__.py | 4 +-- pandas/core/frame.py | 17 ++++----- pandas/core/generic.py | 6 ++-- pandas/core/indexing.py | 4 +-- pandas/core/internals/managers.py | 16 +-------- pandas/tests/apply/test_str.py | 2 +- pandas/tests/extension/base/casting.py | 5 +-- pandas/tests/extension/base/constructors.py | 4 +-- pandas/tests/extension/base/getitem.py | 2 +- pandas/tests/extension/base/reshaping.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 4 +-- pandas/tests/frame/methods/test_cov_corr.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/frame/methods/test_shift.py | 6 ++-- pandas/tests/frame/methods/test_values.py | 2 +- pandas/tests/frame/test_constructors.py | 36 +++++++++---------- pandas/tests/groupby/aggregate/test_cython.py | 2 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/reshape/concat/test_concat.py | 25 +++++++------ pandas/tests/reshape/merge/test_merge.py | 4 +-- pandas/tests/series/indexing/test_setitem.py | 4 +-- 21 files changed, 72 insertions(+), 79 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 12395b42bba19..099d1d2817f5b 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -538,8 +538,8 @@ def shares_memory(left, right) -> bool: left._mask, right._mask ) - if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1: - arr = left._mgr.arrays[0] + if isinstance(left, DataFrame) and len(left._mgr.blocks) == 1: + arr = left._mgr.blocks[0].values return shares_memory(arr, right) raise NotImplementedError(type(left), type(right)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c875ec78891d6..724d4ff0408fa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1062,7 +1062,7 @@ def _is_homogeneous_type(self) -> bool: False """ # The "<" part of "<=" here is for empty DataFrame cases - return len({arr.dtype for arr in self._mgr.arrays}) <= 1 + return len({block.values.dtype for block in self._mgr.blocks}) <= 1 @property def _can_fast_transpose(self) -> bool: @@ -5702,7 +5702,6 @@ def shift( periods = cast(int, periods) ncols = len(self.columns) - arrays = self._mgr.arrays if axis == 1 and periods != 0 and ncols > 0 and freq is None: if fill_value is lib.no_default: # We will infer fill_value to match the closest column @@ -5728,12 +5727,12 @@ def shift( result.columns = self.columns.copy() return result - elif len(arrays) > 1 or ( + elif len(self._mgr.blocks) > 1 or ( # If we only have one block and we know that we can't # keep the same dtype (i.e. the _can_hold_element check) # then we can go through the reindex_indexer path # (and avoid casting logic in the Block method). - not can_hold_element(arrays[0], fill_value) + not can_hold_element(self._mgr.blocks[0].values, fill_value) ): # GH#35488 we need to watch out for multi-block cases # We only get here with fill_value not-lib.no_default @@ -11429,7 +11428,7 @@ def _get_data() -> DataFrame: if numeric_only: df = _get_data() if axis is None: - dtype = find_common_type([arr.dtype for arr in df._mgr.arrays]) + dtype = find_common_type([block.values.dtype for block in df._mgr.blocks]) if isinstance(dtype, ExtensionDtype): df = df.astype(dtype) arr = concat_compat(list(df._iter_column_arrays())) @@ -11454,7 +11453,9 @@ def _get_data() -> DataFrame: # kurtosis excluded since groupby does not implement it if df.shape[1] and name != "kurt": - dtype = find_common_type([arr.dtype for arr in df._mgr.arrays]) + dtype = find_common_type( + [block.values.dtype for block in df._mgr.blocks] + ) if isinstance(dtype, ExtensionDtype): # GH 54341: fastpath for EA-backed axis=1 reductions # This flattens the frame into a single 1D array while keeping @@ -11528,8 +11529,8 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: else: raise NotImplementedError(name) - for arr in self._mgr.arrays: - middle = func(arr, axis=0, skipna=skipna) + for blocks in self._mgr.blocks: + middle = func(blocks.values, axis=0, skipna=skipna) result = ufunc(result, middle) res_ser = self._constructor_sliced(result, index=self.index, copy=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e91aa23fcdb4..816e500381f2c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6367,7 +6367,7 @@ def astype( # TODO(EA2D): special case not needed with 2D EAs dtype = pandas_dtype(dtype) if isinstance(dtype, ExtensionDtype) and all( - arr.dtype == dtype for arr in self._mgr.arrays + block.values.dtype == dtype for block in self._mgr.blocks ): return self.copy(deep=False) # GH 18099/22869: columnwise conversion to extension dtype @@ -11142,9 +11142,9 @@ def _logical_func( if ( self.ndim > 1 and axis == 1 - and len(self._mgr.arrays) > 1 + and len(self._mgr.blocks) > 1 # TODO(EA2D): special-case not needed - and all(x.ndim == 2 for x in self._mgr.arrays) + and all(block.values.ndim == 2 for block in self._mgr.blocks) and not kwargs ): # Fastpath avoiding potentially expensive transpose diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e9bd3b389dd75..9140b1dbe9b33 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1804,10 +1804,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None: # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value - if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1: + if not take_split_path and len(self.obj._mgr.blocks) and self.ndim > 1: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value - arr = self.obj._mgr.arrays[0] + arr = self.obj._mgr.blocks[0].values take_split_path = not can_hold_element( arr, extract_array(val, extract_numpy=True) ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7c1bcbec1d3f2..3821c73f4d4be 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -341,20 +341,6 @@ def get_dtypes(self) -> npt.NDArray[np.object_]: dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object) return dtypes.take(self.blknos) - @property - def arrays(self) -> list[ArrayLike]: - """ - Quick access to the backing arrays of the Blocks. - - Only for compatibility with ArrayManager for testing convenience. - Not to be used in actual code, and return value is not the same as the - ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). - - Warning! The returned arrays don't handle Copy-on-Write, so this should - be used with caution (only in read-mode). - """ - return [blk.values for blk in self.blocks] - def __repr__(self) -> str: output = type(self).__name__ for i, ax in enumerate(self.axes): @@ -2068,7 +2054,7 @@ def array(self) -> ArrayLike: """ Quick access to the backing array of the Block. """ - return self.arrays[0] + return self.blocks[0].values # error: Cannot override writeable attribute with read-only property @property diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e0c5e337fb746..e224b07a1097b 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -287,7 +287,7 @@ def test_transform_groupby_kernel_frame(request, float_frame, op): # same thing, but ensuring we have multiple blocks assert "E" not in float_frame.columns float_frame["E"] = float_frame["A"].copy() - assert len(float_frame._mgr.arrays) > 1 + assert len(float_frame._mgr.blocks) > 1 ones = np.ones(float_frame.shape[0]) gb2 = float_frame.groupby(ones) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 2bfe801c48a77..e924e38ee5030 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -30,8 +30,9 @@ def test_astype_object_frame(self, all_data): blk = result._mgr.blocks[0] assert isinstance(blk, NumpyBlock), type(blk) assert blk.is_object - assert isinstance(result._mgr.arrays[0], np.ndarray) - assert result._mgr.arrays[0].dtype == np.dtype(object) + arr = result._mgr.blocks[0].values + assert isinstance(arr, np.ndarray) + assert arr.dtype == np.dtype(object) # check that we can compare the dtypes comp = result.dtypes == df.dtypes diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index c32a6a6a115ac..639dc874c9fb9 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -69,7 +69,7 @@ def test_dataframe_constructor_from_dict(self, data, from_series): assert result.shape == (len(data), 1) if hasattr(result._mgr, "blocks"): assert isinstance(result._mgr.blocks[0], EABackedBlock) - assert isinstance(result._mgr.arrays[0], ExtensionArray) + assert isinstance(result._mgr.blocks[0].values, ExtensionArray) def test_dataframe_from_series(self, data): result = pd.DataFrame(pd.Series(data)) @@ -77,7 +77,7 @@ def test_dataframe_from_series(self, data): assert result.shape == (len(data), 1) if hasattr(result._mgr, "blocks"): assert isinstance(result._mgr.blocks[0], EABackedBlock) - assert isinstance(result._mgr.arrays[0], ExtensionArray) + assert isinstance(result._mgr.blocks[0].values, ExtensionArray) def test_series_given_mismatched_index_raises(self, data): msg = r"Length of values \(3\) does not match length of index \(5\)" diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 935edce32a0ab..3fa2f50bf4930 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -450,7 +450,7 @@ def test_loc_len1(self, data): df = pd.DataFrame({"A": data}) res = df.loc[[0], "A"] assert res.ndim == 1 - assert res._mgr.arrays[0].ndim == 1 + assert res._mgr.blocks[0].ndim == 1 if hasattr(res._mgr, "blocks"): assert res._mgr._block.ndim == 1 diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 489cd15644d04..24be94443c5ba 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -29,7 +29,7 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype if hasattr(result._mgr, "blocks"): assert isinstance(result._mgr.blocks[0], EABackedBlock) - assert isinstance(result._mgr.arrays[0], ExtensionArray) + assert isinstance(result._mgr.blocks[0].values, ExtensionArray) @pytest.mark.parametrize("in_frame", [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 1fe11c62188e8..6b3927c401395 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -340,8 +340,8 @@ def test_setitem_dt64tz(self, timezone_frame): # assert that A & C are not sharing the same base (e.g. they # are copies) # Note: This does not hold with Copy on Write (because of lazy copying) - v1 = df._mgr.arrays[1] - v2 = df._mgr.arrays[2] + v1 = df._mgr.blocks[1].values + v2 = df._mgr.blocks[2].values tm.assert_extension_array_equal(v1, v2) v1base = v1._ndarray.base v2base = v2._ndarray.base diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 4151a1d27d06a..aeaf80f285f9d 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -214,7 +214,7 @@ def test_corr_item_cache(self): df["B"] = range(10)[::-1] ser = df["A"] # populate item_cache - assert len(df._mgr.arrays) == 2 # i.e. 2 blocks + assert len(df._mgr.blocks) == 2 _ = df.corr(numeric_only=True) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 2ef7780e9a6d5..1b852343266aa 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -47,7 +47,7 @@ def test_fillna_on_column_view(self): assert np.isnan(arr[:, 0]).all() # i.e. we didn't create a new 49-column block - assert len(df._mgr.arrays) == 1 + assert len(df._mgr.blocks) == 1 assert np.shares_memory(df.values, arr) def test_fillna_datetime(self, datetime_frame): diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 72c1a123eac98..4e490e9e344ba 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -320,7 +320,7 @@ def test_shift_categorical1(self, frame_or_series): def get_cat_values(ndframe): # For Series we could just do ._values; for DataFrame # we may be able to do this if we ever have 2D Categoricals - return ndframe._mgr.arrays[0] + return ndframe._mgr.blocks[0].values cat = get_cat_values(obj) @@ -560,7 +560,7 @@ def test_shift_dt64values_int_fill_deprecated(self): # same thing but not consolidated; pre-2.0 we got different behavior df3 = DataFrame({"A": ser}) df3["B"] = ser - assert len(df3._mgr.arrays) == 2 + assert len(df3._mgr.blocks) == 2 result = df3.shift(1, axis=1, fill_value=0) tm.assert_frame_equal(result, expected) @@ -621,7 +621,7 @@ def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat): # same thing but not consolidated df3 = DataFrame({"A": ser}) df3["B"] = ser - assert len(df3._mgr.arrays) == 2 + assert len(df3._mgr.blocks) == 2 result = df3.shift(-1, axis=1, fill_value="foo") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index dfece3fc7552b..2de2053bb705f 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -256,7 +256,7 @@ def test_private_values_dt64_multiblock(self): df = DataFrame({"A": dta[:4]}, copy=False) df["B"] = dta[4:] - assert len(df._mgr.arrays) == 2 + assert len(df._mgr.blocks) == 2 result = df._values expected = dta.reshape(2, 4).T diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e2f12e6e459cb..9203b5bd9c585 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -180,24 +180,24 @@ def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series): arr = arr[:, 0] obj = frame_or_series(arr, dtype=object) - assert obj._mgr.arrays[0].dtype == object - assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + assert obj._mgr.blocks[0].values.dtype == object + assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type) # go through a different path in internals.construction obj = frame_or_series(frame_or_series(arr), dtype=object) - assert obj._mgr.arrays[0].dtype == object - assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + assert obj._mgr.blocks[0].values.dtype == object + assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type) obj = frame_or_series(frame_or_series(arr), dtype=NumpyEADtype(object)) - assert obj._mgr.arrays[0].dtype == object - assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + assert obj._mgr.blocks[0].values.dtype == object + assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type) if frame_or_series is DataFrame: # other paths through internals.construction sers = [Series(x) for x in arr] obj = frame_or_series(sers, dtype=object) - assert obj._mgr.arrays[0].dtype == object - assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + assert obj._mgr.blocks[0].values.dtype == object + assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type) def test_series_with_name_not_matching_column(self): # GH#9232 @@ -297,7 +297,7 @@ def test_constructor_dtype_nocast_view_dataframe(self): def test_constructor_dtype_nocast_view_2d_array(self): df = DataFrame([[1, 2], [3, 4]], dtype="int64") df2 = DataFrame(df.values, dtype=df[0].dtype) - assert df2._mgr.arrays[0].flags.c_contiguous + assert df2._mgr.blocks[0].values.flags.c_contiguous @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies") def test_1d_object_array_does_not_copy(self): @@ -2484,9 +2484,9 @@ def get_base(obj): def check_views(c_only: bool = False): # Check that the underlying data behind df["c"] is still `c` # after setting with iloc. Since we don't know which entry in - # df._mgr.arrays corresponds to df["c"], we just check that exactly + # df._mgr.blocks corresponds to df["c"], we just check that exactly # one of these arrays is `c`. GH#38939 - assert sum(x is c for x in df._mgr.arrays) == 1 + assert sum(x.values is c for x in df._mgr.blocks) == 1 if c_only: # If we ever stop consolidating in setitem_with_indexer, # this will become unnecessary. @@ -2494,17 +2494,17 @@ def check_views(c_only: bool = False): assert ( sum( - get_base(x) is a - for x in df._mgr.arrays - if isinstance(x.dtype, np.dtype) + get_base(x.values) is a + for x in df._mgr.blocks + if isinstance(x.values.dtype, np.dtype) ) == 1 ) assert ( sum( - get_base(x) is b - for x in df._mgr.arrays - if isinstance(x.dtype, np.dtype) + get_base(x.values) is b + for x in df._mgr.blocks + if isinstance(x.values.dtype, np.dtype) ) == 1 ) @@ -3027,7 +3027,7 @@ def test_construction_from_ndarray_datetimelike(self): # constructed from 2D ndarray arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3) df = DataFrame(arr) - assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) + assert all(isinstance(block.values, DatetimeArray) for block in df._mgr.blocks) def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): arr = np.random.default_rng(2).standard_normal((10, 2)) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index aafd06e8f88cf..bf9e82480785c 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -285,7 +285,7 @@ def test_read_only_buffer_source_agg(agg): "species": ["setosa", "setosa", "setosa", "setosa", "setosa"], } ) - df._mgr.arrays[0].flags.writeable = False + df._mgr.blocks[0].values.flags.writeable = False result = df.groupby(["species"]).agg({"sepal_length": agg}) expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 172aa9878caec..8b90a6c32849d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -114,7 +114,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array if frame_or_series is Series: values = obj.values else: - values = obj._mgr.arrays[0] + values = obj._mgr.blocks[0].values if frame_or_series is Series: obj.iloc[:2] = index_or_series_or_array(arr[2:]) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index f86cc0c69d363..550b424371a95 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -5,6 +5,7 @@ from collections.abc import Iterator from datetime import datetime from decimal import Decimal +import itertools import numpy as np import pytest @@ -51,35 +52,39 @@ def test_concat_copy(self): # These are actual copies. result = concat([df, df2, df3], axis=1) - for arr in result._mgr.arrays: - assert arr.base is not None + for block in result._mgr.blocks: + assert block.values.base is not None # These are the same. result = concat([df, df2, df3], axis=1) - for arr in result._mgr.arrays: + for block in result._mgr.blocks: + arr = block.values if arr.dtype.kind == "f": - assert arr.base is df._mgr.arrays[0].base + assert arr.base is df._mgr.blocks[0].values.base elif arr.dtype.kind in ["i", "u"]: - assert arr.base is df2._mgr.arrays[0].base + assert arr.base is df2._mgr.blocks[0].values.base elif arr.dtype == object: assert arr.base is not None # Float block was consolidated. df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1))) result = concat([df, df2, df3, df4], axis=1) - for arr in result._mgr.arrays: + for blocks in result._mgr.blocks: + arr = blocks.values if arr.dtype.kind == "f": # this is a view on some array in either df or df4 assert any( - np.shares_memory(arr, other) - for other in df._mgr.arrays + df4._mgr.arrays + np.shares_memory(arr, block.values) + for block in itertools.chain(df._mgr.blocks, df4._mgr.blocks) ) elif arr.dtype.kind in ["i", "u"]: - assert arr.base is df2._mgr.arrays[0].base + assert arr.base is df2._mgr.blocks[0].values.base elif arr.dtype == object: # this is a view on df3 - assert any(np.shares_memory(arr, other) for other in df3._mgr.arrays) + assert any( + np.shares_memory(arr, block.values) for block in df3._mgr.blocks + ) def test_concat_with_group_keys(self): # axis=0 diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5c5c06dea0008..0a5989e3c82e6 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1451,8 +1451,8 @@ def test_merge_readonly(self): ) # make each underlying block array / column array read-only - for arr in data1._mgr.arrays: - arr.flags.writeable = False + for block in data1._mgr.blocks: + block.values.flags.writeable = False data1.merge(data2) # no error diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index b94e6b6f0c6c8..69fba8925784e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -461,9 +461,9 @@ def test_dt64tz_setitem_does_not_mutate_dti(self): ser = Series(dti) assert ser._values is not dti assert ser._values._ndarray.base is dti._data._ndarray.base - assert ser._mgr.arrays[0]._ndarray.base is dti._data._ndarray.base + assert ser._mgr.blocks[0].values._ndarray.base is dti._data._ndarray.base - assert ser._mgr.arrays[0] is not dti + assert ser._mgr.blocks[0].values is not dti ser[::3] = NaT assert ser[0] is NaT From fac9a99e69900c95715516f4aef683b65520bbcd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 22 May 2024 10:30:08 -0700 Subject: [PATCH 2/3] Add back arrays --- pandas/core/internals/managers.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3821c73f4d4be..9c57807e8a7bf 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -341,6 +341,20 @@ def get_dtypes(self) -> npt.NDArray[np.object_]: dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object) return dtypes.take(self.blknos) + @property + def arrays(self) -> list[ArrayLike]: + """ + Quick access to the backing arrays of the Blocks. + Only for compatibility with ArrayManager for testing convenience. + Not to be used in actual code, and return value is not the same as the + ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). + Warning! The returned arrays don't handle Copy-on-Write, so this should + be used with caution (only in read-mode). + """ + # TODO: Deprecate, usage in Dask + # https://github.com/dask/dask/blob/484fc3f1136827308db133cd256ba74df7a38d8c/dask/base.py#L1312 + return [blk.values for blk in self.blocks] + def __repr__(self) -> str: output = type(self).__name__ for i, ax in enumerate(self.axes): From e46d78bdbdc38df7ddc8c03e3c422e9148083d0b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 22 May 2024 10:30:52 -0700 Subject: [PATCH 3/3] Whitespace --- pandas/core/internals/managers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9c57807e8a7bf..82b88d090f847 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -345,9 +345,11 @@ def get_dtypes(self) -> npt.NDArray[np.object_]: def arrays(self) -> list[ArrayLike]: """ Quick access to the backing arrays of the Blocks. + Only for compatibility with ArrayManager for testing convenience. Not to be used in actual code, and return value is not the same as the ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). + Warning! The returned arrays don't handle Copy-on-Write, so this should be used with caution (only in read-mode). """