Skip to content

Commit 199bf20

Browse files
authored
REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage (#58804)
* REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage * Add back arrays * Whitespace
1 parent 9f71476 commit 199bf20

File tree

21 files changed

+74
-65
lines changed

21 files changed

+74
-65
lines changed

pandas/_testing/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,8 @@ def shares_memory(left, right) -> bool:
531531
left._mask, right._mask
532532
)
533533

534-
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
535-
arr = left._mgr.arrays[0]
534+
if isinstance(left, DataFrame) and len(left._mgr.blocks) == 1:
535+
arr = left._mgr.blocks[0].values
536536
return shares_memory(arr, right)
537537

538538
raise NotImplementedError(type(left), type(right))

pandas/core/frame.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ def _is_homogeneous_type(self) -> bool:
10461046
False
10471047
"""
10481048
# The "<" part of "<=" here is for empty DataFrame cases
1049-
return len({arr.dtype for arr in self._mgr.arrays}) <= 1
1049+
return len({block.values.dtype for block in self._mgr.blocks}) <= 1
10501050

10511051
@property
10521052
def _can_fast_transpose(self) -> bool:
@@ -5726,7 +5726,6 @@ def shift(
57265726
periods = cast(int, periods)
57275727

57285728
ncols = len(self.columns)
5729-
arrays = self._mgr.arrays
57305729
if axis == 1 and periods != 0 and ncols > 0 and freq is None:
57315730
if fill_value is lib.no_default:
57325731
# We will infer fill_value to match the closest column
@@ -5752,12 +5751,12 @@ def shift(
57525751

57535752
result.columns = self.columns.copy()
57545753
return result
5755-
elif len(arrays) > 1 or (
5754+
elif len(self._mgr.blocks) > 1 or (
57565755
# If we only have one block and we know that we can't
57575756
# keep the same dtype (i.e. the _can_hold_element check)
57585757
# then we can go through the reindex_indexer path
57595758
# (and avoid casting logic in the Block method).
5760-
not can_hold_element(arrays[0], fill_value)
5759+
not can_hold_element(self._mgr.blocks[0].values, fill_value)
57615760
):
57625761
# GH#35488 we need to watch out for multi-block cases
57635762
# We only get here with fill_value not-lib.no_default
@@ -11453,7 +11452,7 @@ def _get_data() -> DataFrame:
1145311452
if numeric_only:
1145411453
df = _get_data()
1145511454
if axis is None:
11456-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11455+
dtype = find_common_type([block.values.dtype for block in df._mgr.blocks])
1145711456
if isinstance(dtype, ExtensionDtype):
1145811457
df = df.astype(dtype)
1145911458
arr = concat_compat(list(df._iter_column_arrays()))
@@ -11478,7 +11477,9 @@ def _get_data() -> DataFrame:
1147811477

1147911478
# kurtosis excluded since groupby does not implement it
1148011479
if df.shape[1] and name != "kurt":
11481-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11480+
dtype = find_common_type(
11481+
[block.values.dtype for block in df._mgr.blocks]
11482+
)
1148211483
if isinstance(dtype, ExtensionDtype):
1148311484
# GH 54341: fastpath for EA-backed axis=1 reductions
1148411485
# This flattens the frame into a single 1D array while keeping
@@ -11552,8 +11553,8 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
1155211553
else:
1155311554
raise NotImplementedError(name)
1155411555

11555-
for arr in self._mgr.arrays:
11556-
middle = func(arr, axis=0, skipna=skipna)
11556+
for blocks in self._mgr.blocks:
11557+
middle = func(blocks.values, axis=0, skipna=skipna)
1155711558
result = ufunc(result, middle)
1155811559

1155911560
res_ser = self._constructor_sliced(result, index=self.index, copy=False)

pandas/core/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6373,7 +6373,7 @@ def astype(
63736373
# TODO(EA2D): special case not needed with 2D EAs
63746374
dtype = pandas_dtype(dtype)
63756375
if isinstance(dtype, ExtensionDtype) and all(
6376-
arr.dtype == dtype for arr in self._mgr.arrays
6376+
block.values.dtype == dtype for block in self._mgr.blocks
63776377
):
63786378
return self.copy(deep=False)
63796379
# GH 18099/22869: columnwise conversion to extension dtype
@@ -11148,9 +11148,9 @@ def _logical_func(
1114811148
if (
1114911149
self.ndim > 1
1115011150
and axis == 1
11151-
and len(self._mgr.arrays) > 1
11151+
and len(self._mgr.blocks) > 1
1115211152
# TODO(EA2D): special-case not needed
11153-
and all(x.ndim == 2 for x in self._mgr.arrays)
11153+
and all(block.values.ndim == 2 for block in self._mgr.blocks)
1115411154
and not kwargs
1115511155
):
1115611156
# Fastpath avoiding potentially expensive transpose

pandas/core/indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1804,10 +1804,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
18041804

18051805
# if there is only one block/type, still have to take split path
18061806
# unless the block is one-dimensional or it can hold the value
1807-
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1807+
if not take_split_path and len(self.obj._mgr.blocks) and self.ndim > 1:
18081808
# in case of dict, keys are indices
18091809
val = list(value.values()) if isinstance(value, dict) else value
1810-
arr = self.obj._mgr.arrays[0]
1810+
arr = self.obj._mgr.blocks[0].values
18111811
take_split_path = not can_hold_element(
18121812
arr, extract_array(val, extract_numpy=True)
18131813
)

pandas/core/internals/managers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ def arrays(self) -> list[ArrayLike]:
353353
Warning! The returned arrays don't handle Copy-on-Write, so this should
354354
be used with caution (only in read-mode).
355355
"""
356+
# TODO: Deprecate, usage in Dask
357+
# https://github.com/dask/dask/blob/484fc3f1136827308db133cd256ba74df7a38d8c/dask/base.py#L1312
356358
return [blk.values for blk in self.blocks]
357359

358360
def __repr__(self) -> str:
@@ -2068,7 +2070,7 @@ def array(self) -> ArrayLike:
20682070
"""
20692071
Quick access to the backing array of the Block.
20702072
"""
2071-
return self.arrays[0]
2073+
return self.blocks[0].values
20722074

20732075
# error: Cannot override writeable attribute with read-only property
20742076
@property

pandas/tests/apply/test_str.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_transform_groupby_kernel_frame(request, float_frame, op):
287287
# same thing, but ensuring we have multiple blocks
288288
assert "E" not in float_frame.columns
289289
float_frame["E"] = float_frame["A"].copy()
290-
assert len(float_frame._mgr.arrays) > 1
290+
assert len(float_frame._mgr.blocks) > 1
291291

292292
ones = np.ones(float_frame.shape[0])
293293
gb2 = float_frame.groupby(ones)

pandas/tests/extension/base/casting.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ def test_astype_object_frame(self, all_data):
3030
blk = result._mgr.blocks[0]
3131
assert isinstance(blk, NumpyBlock), type(blk)
3232
assert blk.is_object
33-
assert isinstance(result._mgr.arrays[0], np.ndarray)
34-
assert result._mgr.arrays[0].dtype == np.dtype(object)
33+
arr = result._mgr.blocks[0].values
34+
assert isinstance(arr, np.ndarray)
35+
assert arr.dtype == np.dtype(object)
3536

3637
# check that we can compare the dtypes
3738
comp = result.dtypes == df.dtypes

pandas/tests/extension/base/constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ def test_dataframe_constructor_from_dict(self, data, from_series):
6969
assert result.shape == (len(data), 1)
7070
if hasattr(result._mgr, "blocks"):
7171
assert isinstance(result._mgr.blocks[0], EABackedBlock)
72-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
72+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
7373

7474
def test_dataframe_from_series(self, data):
7575
result = pd.DataFrame(pd.Series(data))
7676
assert result.dtypes[0] == data.dtype
7777
assert result.shape == (len(data), 1)
7878
if hasattr(result._mgr, "blocks"):
7979
assert isinstance(result._mgr.blocks[0], EABackedBlock)
80-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
80+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
8181

8282
def test_series_given_mismatched_index_raises(self, data):
8383
msg = r"Length of values \(3\) does not match length of index \(5\)"

pandas/tests/extension/base/getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def test_loc_len1(self, data):
450450
df = pd.DataFrame({"A": data})
451451
res = df.loc[[0], "A"]
452452
assert res.ndim == 1
453-
assert res._mgr.arrays[0].ndim == 1
453+
assert res._mgr.blocks[0].ndim == 1
454454
if hasattr(res._mgr, "blocks"):
455455
assert res._mgr._block.ndim == 1
456456

pandas/tests/extension/base/reshaping.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_concat(self, data, in_frame):
2929
assert dtype == data.dtype
3030
if hasattr(result._mgr, "blocks"):
3131
assert isinstance(result._mgr.blocks[0], EABackedBlock)
32-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
32+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
3333

3434
@pytest.mark.parametrize("in_frame", [True, False])
3535
def test_concat_all_na_block(self, data_missing, in_frame):

pandas/tests/frame/indexing/test_setitem.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ def test_setitem_dt64tz(self, timezone_frame):
340340
# assert that A & C are not sharing the same base (e.g. they
341341
# are copies)
342342
# Note: This does not hold with Copy on Write (because of lazy copying)
343-
v1 = df._mgr.arrays[1]
344-
v2 = df._mgr.arrays[2]
343+
v1 = df._mgr.blocks[1].values
344+
v2 = df._mgr.blocks[2].values
345345
tm.assert_extension_array_equal(v1, v2)
346346
v1base = v1._ndarray.base
347347
v2base = v2._ndarray.base

pandas/tests/frame/methods/test_cov_corr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def test_corr_item_cache(self):
214214
df["B"] = range(10)[::-1]
215215

216216
ser = df["A"] # populate item_cache
217-
assert len(df._mgr.arrays) == 2 # i.e. 2 blocks
217+
assert len(df._mgr.blocks) == 2
218218

219219
_ = df.corr(numeric_only=True)
220220

pandas/tests/frame/methods/test_fillna.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_fillna_on_column_view(self):
4747
assert np.isnan(arr[:, 0]).all()
4848

4949
# i.e. we didn't create a new 49-column block
50-
assert len(df._mgr.arrays) == 1
50+
assert len(df._mgr.blocks) == 1
5151
assert np.shares_memory(df.values, arr)
5252

5353
def test_fillna_datetime(self, datetime_frame):

pandas/tests/frame/methods/test_shift.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def test_shift_categorical1(self, frame_or_series):
320320
def get_cat_values(ndframe):
321321
# For Series we could just do ._values; for DataFrame
322322
# we may be able to do this if we ever have 2D Categoricals
323-
return ndframe._mgr.arrays[0]
323+
return ndframe._mgr.blocks[0].values
324324

325325
cat = get_cat_values(obj)
326326

@@ -560,7 +560,7 @@ def test_shift_dt64values_int_fill_deprecated(self):
560560
# same thing but not consolidated; pre-2.0 we got different behavior
561561
df3 = DataFrame({"A": ser})
562562
df3["B"] = ser
563-
assert len(df3._mgr.arrays) == 2
563+
assert len(df3._mgr.blocks) == 2
564564
result = df3.shift(1, axis=1, fill_value=0)
565565
tm.assert_frame_equal(result, expected)
566566

@@ -621,7 +621,7 @@ def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat):
621621
# same thing but not consolidated
622622
df3 = DataFrame({"A": ser})
623623
df3["B"] = ser
624-
assert len(df3._mgr.arrays) == 2
624+
assert len(df3._mgr.blocks) == 2
625625
result = df3.shift(-1, axis=1, fill_value="foo")
626626
tm.assert_frame_equal(result, expected)
627627

pandas/tests/frame/methods/test_values.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_private_values_dt64_multiblock(self):
256256
df = DataFrame({"A": dta[:4]}, copy=False)
257257
df["B"] = dta[4:]
258258

259-
assert len(df._mgr.arrays) == 2
259+
assert len(df._mgr.blocks) == 2
260260

261261
result = df._values
262262
expected = dta.reshape(2, 4).T

pandas/tests/frame/test_constructors.py

+18-18
Original file line numberDiff line numberDiff line change
@@ -180,24 +180,24 @@ def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series):
180180
arr = arr[:, 0]
181181

182182
obj = frame_or_series(arr, dtype=object)
183-
assert obj._mgr.arrays[0].dtype == object
184-
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
183+
assert obj._mgr.blocks[0].values.dtype == object
184+
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)
185185

186186
# go through a different path in internals.construction
187187
obj = frame_or_series(frame_or_series(arr), dtype=object)
188-
assert obj._mgr.arrays[0].dtype == object
189-
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
188+
assert obj._mgr.blocks[0].values.dtype == object
189+
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)
190190

191191
obj = frame_or_series(frame_or_series(arr), dtype=NumpyEADtype(object))
192-
assert obj._mgr.arrays[0].dtype == object
193-
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
192+
assert obj._mgr.blocks[0].values.dtype == object
193+
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)
194194

195195
if frame_or_series is DataFrame:
196196
# other paths through internals.construction
197197
sers = [Series(x) for x in arr]
198198
obj = frame_or_series(sers, dtype=object)
199-
assert obj._mgr.arrays[0].dtype == object
200-
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
199+
assert obj._mgr.blocks[0].values.dtype == object
200+
assert isinstance(obj._mgr.blocks[0].values.ravel()[0], scalar_type)
201201

202202
def test_series_with_name_not_matching_column(self):
203203
# GH#9232
@@ -297,7 +297,7 @@ def test_constructor_dtype_nocast_view_dataframe(self):
297297
def test_constructor_dtype_nocast_view_2d_array(self):
298298
df = DataFrame([[1, 2], [3, 4]], dtype="int64")
299299
df2 = DataFrame(df.values, dtype=df[0].dtype)
300-
assert df2._mgr.arrays[0].flags.c_contiguous
300+
assert df2._mgr.blocks[0].values.flags.c_contiguous
301301

302302
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies")
303303
def test_1d_object_array_does_not_copy(self):
@@ -2493,27 +2493,27 @@ def get_base(obj):
24932493
def check_views(c_only: bool = False):
24942494
# Check that the underlying data behind df["c"] is still `c`
24952495
# after setting with iloc. Since we don't know which entry in
2496-
# df._mgr.arrays corresponds to df["c"], we just check that exactly
2496+
# df._mgr.blocks corresponds to df["c"], we just check that exactly
24972497
# one of these arrays is `c`. GH#38939
2498-
assert sum(x is c for x in df._mgr.arrays) == 1
2498+
assert sum(x.values is c for x in df._mgr.blocks) == 1
24992499
if c_only:
25002500
# If we ever stop consolidating in setitem_with_indexer,
25012501
# this will become unnecessary.
25022502
return
25032503

25042504
assert (
25052505
sum(
2506-
get_base(x) is a
2507-
for x in df._mgr.arrays
2508-
if isinstance(x.dtype, np.dtype)
2506+
get_base(x.values) is a
2507+
for x in df._mgr.blocks
2508+
if isinstance(x.values.dtype, np.dtype)
25092509
)
25102510
== 1
25112511
)
25122512
assert (
25132513
sum(
2514-
get_base(x) is b
2515-
for x in df._mgr.arrays
2516-
if isinstance(x.dtype, np.dtype)
2514+
get_base(x.values) is b
2515+
for x in df._mgr.blocks
2516+
if isinstance(x.values.dtype, np.dtype)
25172517
)
25182518
== 1
25192519
)
@@ -3045,7 +3045,7 @@ def test_construction_from_ndarray_datetimelike(self):
30453045
# constructed from 2D ndarray
30463046
arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
30473047
df = DataFrame(arr)
3048-
assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)
3048+
assert all(isinstance(block.values, DatetimeArray) for block in df._mgr.blocks)
30493049

30503050
def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
30513051
arr = np.random.default_rng(2).standard_normal((10, 2))

pandas/tests/groupby/aggregate/test_cython.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def test_read_only_buffer_source_agg(agg):
285285
"species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
286286
}
287287
)
288-
df._mgr.arrays[0].flags.writeable = False
288+
df._mgr.blocks[0].values.flags.writeable = False
289289

290290
result = df.groupby(["species"]).agg({"sepal_length": agg})
291291
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})

pandas/tests/indexing/test_iloc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array
114114
if frame_or_series is Series:
115115
values = obj.values
116116
else:
117-
values = obj._mgr.arrays[0]
117+
values = obj._mgr.blocks[0].values
118118

119119
if frame_or_series is Series:
120120
obj.iloc[:2] = index_or_series_or_array(arr[2:])

0 commit comments

Comments
 (0)