Skip to content

REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage #58804

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,8 @@ def shares_memory(left, right) -> bool:
left._mask, right._mask
)

if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
arr = left._mgr.arrays[0]
if isinstance(left, DataFrame) and len(left._mgr.blocks) == 1:
arr = left._mgr.blocks[0].values
return shares_memory(arr, right)

raise NotImplementedError(type(left), type(right))
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,7 @@ def _is_homogeneous_type(self) -> bool:
False
"""
# The "<" part of "<=" here is for empty DataFrame cases
return len({arr.dtype for arr in self._mgr.arrays}) <= 1
return len({block.values.dtype for block in self._mgr.blocks}) <= 1

@property
def _can_fast_transpose(self) -> bool:
Expand Down Expand Up @@ -5702,7 +5702,6 @@ def shift(
periods = cast(int, periods)

ncols = len(self.columns)
arrays = self._mgr.arrays
if axis == 1 and periods != 0 and ncols > 0 and freq is None:
if fill_value is lib.no_default:
# We will infer fill_value to match the closest column
Expand All @@ -5728,12 +5727,12 @@ def shift(

result.columns = self.columns.copy()
return result
elif len(arrays) > 1 or (
elif len(self._mgr.blocks) > 1 or (
# If we only have one block and we know that we can't
# keep the same dtype (i.e. the _can_hold_element check)
# then we can go through the reindex_indexer path
# (and avoid casting logic in the Block method).
not can_hold_element(arrays[0], fill_value)
not can_hold_element(self._mgr.blocks[0].values, fill_value)
):
# GH#35488 we need to watch out for multi-block cases
# We only get here with fill_value not-lib.no_default
Expand Down Expand Up @@ -11429,7 +11428,7 @@ def _get_data() -> DataFrame:
if numeric_only:
df = _get_data()
if axis is None:
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
dtype = find_common_type([block.values.dtype for block in df._mgr.blocks])
if isinstance(dtype, ExtensionDtype):
df = df.astype(dtype)
arr = concat_compat(list(df._iter_column_arrays()))
Expand All @@ -11454,7 +11453,9 @@ def _get_data() -> DataFrame:

# kurtosis excluded since groupby does not implement it
if df.shape[1] and name != "kurt":
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
dtype = find_common_type(
[block.values.dtype for block in df._mgr.blocks]
)
if isinstance(dtype, ExtensionDtype):
# GH 54341: fastpath for EA-backed axis=1 reductions
# This flattens the frame into a single 1D array while keeping
Expand Down Expand Up @@ -11528,8 +11529,8 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
else:
raise NotImplementedError(name)

for arr in self._mgr.arrays:
middle = func(arr, axis=0, skipna=skipna)
for blocks in self._mgr.blocks:
middle = func(blocks.values, axis=0, skipna=skipna)
result = ufunc(result, middle)

res_ser = self._constructor_sliced(result, index=self.index, copy=False)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6367,7 +6367,7 @@ def astype(
# TODO(EA2D): special case not needed with 2D EAs
dtype = pandas_dtype(dtype)
if isinstance(dtype, ExtensionDtype) and all(
arr.dtype == dtype for arr in self._mgr.arrays
block.values.dtype == dtype for block in self._mgr.blocks
):
return self.copy(deep=False)
# GH 18099/22869: columnwise conversion to extension dtype
Expand Down Expand Up @@ -11142,9 +11142,9 @@ def _logical_func(
if (
self.ndim > 1
and axis == 1
and len(self._mgr.arrays) > 1
and len(self._mgr.blocks) > 1
# TODO(EA2D): special-case not needed
and all(x.ndim == 2 for x in self._mgr.arrays)
and all(block.values.ndim == 2 for block in self._mgr.blocks)
and not kwargs
):
# Fastpath avoiding potentially expensive transpose
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,10 +1804,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:

# if there is only one block/type, still have to take split path
# unless the block is one-dimensional or it can hold the value
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
if not take_split_path and len(self.obj._mgr.blocks) and self.ndim > 1:
# in case of dict, keys are indices
val = list(value.values()) if isinstance(value, dict) else value
arr = self.obj._mgr.arrays[0]
arr = self.obj._mgr.blocks[0].values
take_split_path = not can_hold_element(
arr, extract_array(val, extract_numpy=True)
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@ def arrays(self) -> list[ArrayLike]:
Warning! The returned arrays don't handle Copy-on-Write, so this should
be used with caution (only in read-mode).
"""
# TODO: Deprecate, usage in Dask
# https://github.com/dask/dask/blob/484fc3f1136827308db133cd256ba74df7a38d8c/dask/base.py#L1312
return [blk.values for blk in self.blocks]

def __repr__(self) -> str:
Expand Down Expand Up @@ -2068,7 +2070,7 @@ def array(self) -> ArrayLike:
"""
Quick access to the backing array of the Block.
"""
return self.arrays[0]
return self.blocks[0].values

# error: Cannot override writeable attribute with read-only property
@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def test_transform_groupby_kernel_frame(request, float_frame, op):
# same thing, but ensuring we have multiple blocks
assert "E" not in float_frame.columns
float_frame["E"] = float_frame["A"].copy()
assert len(float_frame._mgr.arrays) > 1
assert len(float_frame._mgr.blocks) > 1

ones = np.ones(float_frame.shape[0])
gb2 = float_frame.groupby(ones)
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/base/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ def test_astype_object_frame(self, all_data):
blk = result._mgr.blocks[0]
assert isinstance(blk, NumpyBlock), type(blk)
assert blk.is_object
assert isinstance(result._mgr.arrays[0], np.ndarray)
assert result._mgr.arrays[0].dtype == np.dtype(object)
arr = result._mgr.blocks[0].values
assert isinstance(arr, np.ndarray)
assert arr.dtype == np.dtype(object)

# check that we can compare the dtypes
comp = result.dtypes == df.dtypes
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ def test_dataframe_constructor_from_dict(self, data, from_series):
assert result.shape == (len(data), 1)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], EABackedBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)

def test_dataframe_from_series(self, data):
result = pd.DataFrame(pd.Series(data))
assert result.dtypes[0] == data.dtype
assert result.shape == (len(data), 1)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], EABackedBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)

def test_series_given_mismatched_index_raises(self, data):
msg = r"Length of values \(3\) does not match length of index \(5\)"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def test_loc_len1(self, data):
df = pd.DataFrame({"A": data})
res = df.loc[[0], "A"]
assert res.ndim == 1
assert res._mgr.arrays[0].ndim == 1
assert res._mgr.blocks[0].ndim == 1
if hasattr(res._mgr, "blocks"):
assert res._mgr._block.ndim == 1

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_concat(self, data, in_frame):
assert dtype == data.dtype
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], EABackedBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)

@pytest.mark.parametrize("in_frame", [True, False])
def test_concat_all_na_block(self, data_missing, in_frame):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ def test_setitem_dt64tz(self, timezone_frame):
# assert that A & C are not sharing the same base (e.g. they
# are copies)
# Note: This does not hold with Copy on Write (because of lazy copying)
v1 = df._mgr.arrays[1]
v2 = df._mgr.arrays[2]
v1 = df._mgr.blocks[1].values
v2 = df._mgr.blocks[2].values
tm.assert_extension_array_equal(v1, v2)
v1base = v1._ndarray.base
v2base = v2._ndarray.base
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_corr_item_cache(self):
df["B"] = range(10)[::-1]

ser = df["A"] # populate item_cache
assert len(df._mgr.arrays) == 2 # i.e. 2 blocks
assert len(df._mgr.blocks) == 2

_ = df.corr(numeric_only=True)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_fillna_on_column_view(self):
assert np.isnan(arr[:, 0]).all()

# i.e. we didn't create a new 49-column block
assert len(df._mgr.arrays) == 1
assert len(df._mgr.blocks) == 1
assert np.shares_memory(df.values, arr)

def test_fillna_datetime(self, datetime_frame):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def test_shift_categorical1(self, frame_or_series):
def get_cat_values(ndframe):
# For Series we could just do ._values; for DataFrame
# we may be able to do this if we ever have 2D Categoricals
return ndframe._mgr.arrays[0]
return ndframe._mgr.blocks[0].values

cat = get_cat_values(obj)

Expand Down Expand Up @@ -560,7 +560,7 @@ def test_shift_dt64values_int_fill_deprecated(self):
# same thing but not consolidated; pre-2.0 we got different behavior
df3 = DataFrame({"A": ser})
df3["B"] = ser
assert len(df3._mgr.arrays) == 2
assert len(df3._mgr.blocks) == 2
result = df3.shift(1, axis=1, fill_value=0)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -621,7 +621,7 @@ def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat):
# same thing but not consolidated
df3 = DataFrame({"A": ser})
df3["B"] = ser
assert len(df3._mgr.arrays) == 2
assert len(df3._mgr.blocks) == 2
result = df3.shift(-1, axis=1, fill_value="foo")
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_private_values_dt64_multiblock(self):
df = DataFrame({"A": dta[:4]}, copy=False)
df["B"] = dta[4:]

assert len(df._mgr.arrays) == 2
assert len(df._mgr.blocks) == 2

result = df._values
expected = dta.reshape(2, 4).T
Expand Down
36 changes: 18 additions & 18 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,24 +180,24 @@ def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series):
arr = arr[:, 0]

obj = frame_or_series(arr, dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
assert obj._mgr.blocks[0].values.dtype == object
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)

# go through a different path in internals.construction
obj = frame_or_series(frame_or_series(arr), dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
assert obj._mgr.blocks[0].values.dtype == object
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)

obj = frame_or_series(frame_or_series(arr), dtype=NumpyEADtype(object))
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
assert obj._mgr.blocks[0].values.dtype == object
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)

if frame_or_series is DataFrame:
# other paths through internals.construction
sers = [Series(x) for x in arr]
obj = frame_or_series(sers, dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
assert obj._mgr.blocks[0].values.dtype == object
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)

def test_series_with_name_not_matching_column(self):
# GH#9232
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_constructor_dtype_nocast_view_dataframe(self):
def test_constructor_dtype_nocast_view_2d_array(self):
df = DataFrame([[1, 2], [3, 4]], dtype="int64")
df2 = DataFrame(df.values, dtype=df[0].dtype)
assert df2._mgr.arrays[0].flags.c_contiguous
assert df2._mgr.blocks[0].values.flags.c_contiguous

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
def test_1d_object_array_does_not_copy(self):
Expand Down Expand Up @@ -2484,27 +2484,27 @@ def get_base(obj):
def check_views(c_only: bool = False):
# Check that the underlying data behind df["c"] is still `c`
# after setting with iloc. Since we don't know which entry in
# df._mgr.arrays corresponds to df["c"], we just check that exactly
# df._mgr.blocks corresponds to df["c"], we just check that exactly
# one of these arrays is `c`. GH#38939
assert sum(x is c for x in df._mgr.arrays) == 1
assert sum(x.values is c for x in df._mgr.blocks) == 1
if c_only:
# If we ever stop consolidating in setitem_with_indexer,
# this will become unnecessary.
return

assert (
sum(
get_base(x) is a
for x in df._mgr.arrays
if isinstance(x.dtype, np.dtype)
get_base(x.values) is a
for x in df._mgr.blocks
if isinstance(x.values.dtype, np.dtype)
)
== 1
)
assert (
sum(
get_base(x) is b
for x in df._mgr.arrays
if isinstance(x.dtype, np.dtype)
get_base(x.values) is b
for x in df._mgr.blocks
if isinstance(x.values.dtype, np.dtype)
)
== 1
)
Expand Down Expand Up @@ -3027,7 +3027,7 @@ def test_construction_from_ndarray_datetimelike(self):
# constructed from 2D ndarray
arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
df = DataFrame(arr)
assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)
assert all(isinstance(block.values, DatetimeArray) for block in df._mgr.blocks)

def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
arr = np.random.default_rng(2).standard_normal((10, 2))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_read_only_buffer_source_agg(agg):
"species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
}
)
df._mgr.arrays[0].flags.writeable = False
df._mgr.blocks[0].values.flags.writeable = False

result = df.groupby(["species"]).agg({"sepal_length": agg})
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array
if frame_or_series is Series:
values = obj.values
else:
values = obj._mgr.arrays[0]
values = obj._mgr.blocks[0].values

if frame_or_series is Series:
obj.iloc[:2] = index_or_series_or_array(arr[2:])
Expand Down
Loading
Loading