Skip to content

Commit 6423561

Browse files
authored
API: pseudo-public internals API for downstream libraries (#40182)
1 parent 04a0b86 commit 6423561

File tree

9 files changed

+169
-47
lines changed

9 files changed

+169
-47
lines changed

pandas/core/internals/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pandas.core.internals.api import make_block # pseudo-public version
12
from pandas.core.internals.array_manager import (
23
ArrayManager,
34
SingleArrayManager,
@@ -16,7 +17,6 @@
1617
NumericBlock,
1718
ObjectBlock,
1819
TimeDeltaBlock,
19-
make_block,
2020
)
2121
from pandas.core.internals.concat import concatenate_managers
2222
from pandas.core.internals.managers import (

pandas/core/internals/api.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
This is a pseudo-public API for downstream libraries. We ask that downstream
3+
authors
4+
5+
1) Try to avoid using internals directly altogether, and failing that,
6+
2) Use only functions exposed here (or in core.internals)
7+
8+
"""
9+
from typing import Optional
10+
11+
import numpy as np
12+
13+
from pandas._typing import Dtype
14+
15+
from pandas.core.dtypes.common import is_datetime64tz_dtype
16+
from pandas.core.dtypes.dtypes import PandasDtype
17+
from pandas.core.dtypes.generic import ABCPandasArray
18+
19+
from pandas.core.arrays import DatetimeArray
20+
from pandas.core.internals.blocks import (
21+
Block,
22+
DatetimeTZBlock,
23+
get_block_type,
24+
)
25+
26+
27+
def make_block(
28+
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
29+
) -> Block:
30+
"""
31+
This is a pseudo-public analogue to blocks.new_block.
32+
33+
We ask that downstream libraries use this rather than any fully-internal
34+
APIs, including but not limited to:
35+
36+
- core.internals.blocks.make_block
37+
- Block.make_block
38+
- Block.make_block_same_class
39+
- Block.__init__
40+
"""
41+
if isinstance(values, ABCPandasArray):
42+
# Ensure that we don't allow PandasArray / PandasDtype in internals.
43+
# For now, blocks should be backed by ndarrays when possible.
44+
values = values.to_numpy()
45+
if ndim and ndim > 1:
46+
# TODO(EA2D): special case not needed with 2D EAs
47+
values = np.atleast_2d(values)
48+
49+
if isinstance(dtype, PandasDtype):
50+
dtype = dtype.numpy_dtype
51+
52+
if klass is None:
53+
dtype = dtype or values.dtype
54+
klass = get_block_type(values, dtype)
55+
56+
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
57+
# TODO: This is no longer hit internally; does it need to be retained
58+
# for e.g. pyarrow?
59+
values = DatetimeArray._simple_new(values, dtype=dtype)
60+
61+
return klass(values, ndim=ndim, placement=placement)

pandas/core/internals/array_manager.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
DataManager,
8686
SingleDataManager,
8787
)
88-
from pandas.core.internals.blocks import make_block
88+
from pandas.core.internals.blocks import new_block
8989

9090
if TYPE_CHECKING:
9191
from pandas import Float64Index
@@ -466,9 +466,9 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
466466
if self.ndim == 2:
467467
if isinstance(arr, np.ndarray):
468468
arr = np.atleast_2d(arr)
469-
block = make_block(arr, placement=slice(0, 1, 1), ndim=2)
469+
block = new_block(arr, placement=slice(0, 1, 1), ndim=2)
470470
else:
471-
block = make_block(arr, placement=slice(0, len(self), 1), ndim=1)
471+
block = new_block(arr, placement=slice(0, len(self), 1), ndim=1)
472472

473473
applied = getattr(block, f)(**kwargs)
474474
if isinstance(applied, list):

pandas/core/internals/blocks.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def make_block(self, values, placement=None) -> Block:
316316
if self.is_extension:
317317
values = ensure_block_shape(values, ndim=self.ndim)
318318

319-
return make_block(values, placement=placement, ndim=self.ndim)
319+
return new_block(values, placement=placement, ndim=self.ndim)
320320

321321
@final
322322
def make_block_same_class(self, values, placement=None) -> Block:
@@ -1431,7 +1431,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
14311431
new_values = new_values.T[mask]
14321432
new_placement = new_placement[mask]
14331433

1434-
blocks = [make_block(new_values, placement=new_placement, ndim=2)]
1434+
blocks = [new_block(new_values, placement=new_placement, ndim=2)]
14351435
return blocks, mask
14361436

14371437
def quantile(
@@ -1460,7 +1460,7 @@ def quantile(
14601460

14611461
result = quantile_compat(self.values, qs, interpolation, axis)
14621462

1463-
return make_block(result, placement=self.mgr_locs, ndim=2)
1463+
return new_block(result, placement=self.mgr_locs, ndim=2)
14641464

14651465

14661466
class ExtensionBlock(Block):
@@ -2301,7 +2301,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
23012301
return cls
23022302

23032303

2304-
def make_block(
2304+
def new_block(
23052305
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
23062306
) -> Block:
23072307
# Ensure that we don't allow PandasArray / PandasDtype in internals.

pandas/core/internals/concat.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
ExtensionArray,
4343
)
4444
from pandas.core.internals.array_manager import ArrayManager
45-
from pandas.core.internals.blocks import make_block
45+
from pandas.core.internals.blocks import new_block
4646
from pandas.core.internals.managers import BlockManager
4747

4848
if TYPE_CHECKING:
@@ -144,10 +144,10 @@ def concatenate_managers(
144144
# Fast-path
145145
b = blk.make_block_same_class(values, placement=placement)
146146
else:
147-
b = make_block(values, placement=placement, ndim=blk.ndim)
147+
b = new_block(values, placement=placement, ndim=blk.ndim)
148148
else:
149149
new_values = _concatenate_join_units(join_units, concat_axis, copy=copy)
150-
b = make_block(new_values, placement=placement, ndim=len(axes))
150+
b = new_block(new_values, placement=placement, ndim=len(axes))
151151
blocks.append(b)
152152

153153
return BlockManager(blocks, axes)

pandas/core/internals/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
from pandas.core.internals.array_manager import ArrayManager
7676
from pandas.core.internals.blocks import (
7777
ensure_block_shape,
78-
make_block,
78+
new_block,
7979
)
8080
from pandas.core.internals.managers import (
8181
BlockManager,
@@ -300,7 +300,7 @@ def ndarray_to_mgr(
300300
# TODO: What about re-joining object columns?
301301
dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list]
302302
block_values = [
303-
make_block(dvals_list[n], placement=[n], ndim=2)
303+
new_block(dvals_list[n], placement=n, ndim=2)
304304
for n in range(len(dvals_list))
305305
]
306306

pandas/core/internals/managers.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
ensure_block_shape,
7575
extend_blocks,
7676
get_block_type,
77-
make_block,
77+
new_block,
7878
)
7979
from pandas.core.internals.ops import (
8080
blockwise_all,
@@ -322,7 +322,7 @@ def unpickle_block(values, mgr_locs, ndim: int):
322322
# TODO(EA2D): ndim would be unnecessary with 2D EAs
323323
# older pickles may store e.g. DatetimeIndex instead of DatetimeArray
324324
values = extract_array(values, extract_numpy=True)
325-
return make_block(values, placement=mgr_locs, ndim=ndim)
325+
return new_block(values, placement=mgr_locs, ndim=ndim)
326326

327327
if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
328328
state = state[3]["0.14.1"]
@@ -1148,7 +1148,7 @@ def value_getitem(placement):
11481148
# one item.
11491149
# TODO(EA2D): special casing unnecessary with 2D EAs
11501150
new_blocks.extend(
1151-
make_block(
1151+
new_block(
11521152
values=value,
11531153
ndim=self.ndim,
11541154
placement=slice(mgr_loc, mgr_loc + 1),
@@ -1164,7 +1164,7 @@ def value_getitem(placement):
11641164
unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])
11651165

11661166
new_blocks.append(
1167-
make_block(
1167+
new_block(
11681168
values=value_getitem(unfit_val_items),
11691169
ndim=self.ndim,
11701170
placement=unfit_mgr_locs,
@@ -1209,7 +1209,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
12091209
value = ensure_block_shape(value, ndim=2)
12101210

12111211
# TODO: type value as ArrayLike
1212-
block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1212+
block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
12131213

12141214
for blkno, count in _fast_count_smallints(self.blknos[loc:]):
12151215
blk = self.blocks[blkno]
@@ -1436,7 +1436,7 @@ def _make_na_block(self, placement, fill_value=None):
14361436
dtype, fill_value = infer_dtype_from_scalar(fill_value)
14371437
block_values = np.empty(block_shape, dtype=dtype)
14381438
block_values.fill(fill_value)
1439-
return make_block(block_values, placement=placement, ndim=block_values.ndim)
1439+
return new_block(block_values, placement=placement, ndim=block_values.ndim)
14401440

14411441
def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
14421442
"""
@@ -1562,7 +1562,7 @@ def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
15621562
"""
15631563
Constructor for if we have an array that is not yet a Block.
15641564
"""
1565-
block = make_block(array, placement=slice(0, len(index)), ndim=1)
1565+
block = new_block(array, placement=slice(0, len(index)), ndim=1)
15661566
return cls(block, index)
15671567

15681568
def _post_setstate(self):
@@ -1669,7 +1669,7 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
16691669
# is basically "all items", but if there're many, don't bother
16701670
# converting, it's an error anyway.
16711671
blocks = [
1672-
make_block(
1672+
new_block(
16731673
values=blocks[0], placement=slice(0, len(axes[0])), ndim=2
16741674
)
16751675
]
@@ -1780,7 +1780,7 @@ def _form_blocks(
17801780

17811781
if len(items_dict["DatetimeTZBlock"]):
17821782
dttz_blocks = [
1783-
make_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
1783+
new_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
17841784
for i, array in items_dict["DatetimeTZBlock"]
17851785
]
17861786
blocks.extend(dttz_blocks)
@@ -1791,22 +1791,22 @@ def _form_blocks(
17911791

17921792
if len(items_dict["CategoricalBlock"]) > 0:
17931793
cat_blocks = [
1794-
make_block(array, klass=CategoricalBlock, placement=i, ndim=2)
1794+
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
17951795
for i, array in items_dict["CategoricalBlock"]
17961796
]
17971797
blocks.extend(cat_blocks)
17981798

17991799
if len(items_dict["ExtensionBlock"]):
18001800
external_blocks = [
1801-
make_block(array, klass=ExtensionBlock, placement=i, ndim=2)
1801+
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
18021802
for i, array in items_dict["ExtensionBlock"]
18031803
]
18041804

18051805
blocks.extend(external_blocks)
18061806

18071807
if len(items_dict["ObjectValuesExtensionBlock"]):
18081808
external_blocks = [
1809-
make_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
1809+
new_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
18101810
for i, array in items_dict["ObjectValuesExtensionBlock"]
18111811
]
18121812

@@ -1819,7 +1819,7 @@ def _form_blocks(
18191819
block_values = np.empty(shape, dtype=object)
18201820
block_values.fill(np.nan)
18211821

1822-
na_block = make_block(block_values, placement=extra_locs, ndim=2)
1822+
na_block = new_block(block_values, placement=extra_locs, ndim=2)
18231823
blocks.append(na_block)
18241824

18251825
return blocks
@@ -1836,7 +1836,7 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
18361836
if dtype is not None and values.dtype != dtype: # pragma: no cover
18371837
values = values.astype(dtype)
18381838

1839-
block = make_block(values, placement=placement, ndim=2)
1839+
block = new_block(values, placement=placement, ndim=2)
18401840
return [block]
18411841

18421842

@@ -1850,7 +1850,7 @@ def _multi_blockify(tuples, dtype: Optional[Dtype] = None):
18501850

18511851
values, placement = _stack_arrays(list(tup_block), dtype)
18521852

1853-
block = make_block(values, placement=placement, ndim=2)
1853+
block = new_block(values, placement=placement, ndim=2)
18541854
new_blocks.append(block)
18551855

18561856
return new_blocks
@@ -1928,7 +1928,7 @@ def _merge_blocks(
19281928
new_values = new_values[argsort]
19291929
new_mgr_locs = new_mgr_locs[argsort]
19301930

1931-
return [make_block(new_values, placement=new_mgr_locs, ndim=2)]
1931+
return [new_block(new_values, placement=new_mgr_locs, ndim=2)]
19321932

19331933
# can't consolidate --> no merge
19341934
return blocks

pandas/tests/internals/test_api.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Tests for the pseudo-public API implemented in internals/api.py and exposed
3+
in core.internals
4+
"""
5+
6+
from pandas.core import internals
7+
from pandas.core.internals import api
8+
9+
10+
def test_internals_api():
11+
assert internals.make_block is api.make_block
12+
13+
14+
def test_namespace():
15+
# SUBJECT TO CHANGE
16+
17+
modules = [
18+
"blocks",
19+
"concat",
20+
"managers",
21+
"construction",
22+
"array_manager",
23+
"base",
24+
"api",
25+
"ops",
26+
]
27+
expected = [
28+
"Block",
29+
"CategoricalBlock",
30+
"NumericBlock",
31+
"DatetimeBlock",
32+
"DatetimeTZBlock",
33+
"ExtensionBlock",
34+
"FloatBlock",
35+
"ObjectBlock",
36+
"TimeDeltaBlock",
37+
"make_block",
38+
"DataManager",
39+
"ArrayManager",
40+
"BlockManager",
41+
"SingleDataManager",
42+
"SingleBlockManager",
43+
"SingleArrayManager",
44+
"concatenate_managers",
45+
"create_block_manager_from_arrays",
46+
"create_block_manager_from_blocks",
47+
]
48+
49+
result = [x for x in dir(internals) if not x.startswith("__")]
50+
assert set(result) == set(expected + modules)

0 commit comments

Comments
 (0)