Skip to content

Commit 5e21be0

Browse files
authored
TYP: require Index objects earlier in internals (#33100)
1 parent d88b90d commit 5e21be0

File tree

3 files changed

+34
-28
lines changed

3 files changed

+34
-28
lines changed

pandas/core/internals/__init__.py

-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
BlockManager,
1919
SingleBlockManager,
2020
concatenate_block_managers,
21-
create_block_manager_from_arrays,
22-
create_block_manager_from_blocks,
2321
)
2422

2523
__all__ = [
@@ -40,6 +38,4 @@
4038
"BlockManager",
4139
"SingleBlockManager",
4240
"concatenate_block_managers",
43-
"create_block_manager_from_arrays",
44-
"create_block_manager_from_blocks",
4541
]

pandas/core/internals/construction.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
constructors before passing them to a BlockManager.
44
"""
55
from collections import abc
6+
from typing import Tuple
67

78
import numpy as np
89
import numpy.ma as ma
@@ -29,7 +30,6 @@
2930
ABCDataFrame,
3031
ABCDatetimeIndex,
3132
ABCIndexClass,
32-
ABCPeriodIndex,
3333
ABCSeries,
3434
ABCTimedeltaIndex,
3535
)
@@ -44,7 +44,7 @@
4444
get_objs_combined_axis,
4545
union_indexes,
4646
)
47-
from pandas.core.internals import (
47+
from pandas.core.internals.managers import (
4848
create_block_manager_from_arrays,
4949
create_block_manager_from_blocks,
5050
)
@@ -53,12 +53,16 @@
5353
# BlockManager Interface
5454

5555

56-
def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrity=True):
56+
def arrays_to_mgr(
57+
arrays, arr_names, index, columns, dtype=None, verify_integrity: bool = True
58+
):
5759
"""
5860
Segregate Series based on type and coerce into matrices.
5961
6062
Needs to handle a lot of exceptional cases.
6163
"""
64+
arr_names = ensure_index(arr_names)
65+
6266
if verify_integrity:
6367
# figure out the index, if necessary
6468
if index is None:
@@ -70,6 +74,9 @@ def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrit
7074
arrays = _homogenize(arrays, index, dtype)
7175

7276
columns = ensure_index(columns)
77+
else:
78+
columns = ensure_index(columns)
79+
index = ensure_index(index)
7380

7481
# from BlockManager perspective
7582
axes = [columns, index]
@@ -163,7 +170,8 @@ def init_ndarray(values, index, columns, dtype=None, copy=False):
163170
values = [values]
164171

165172
if columns is None:
166-
columns = list(range(len(values)))
173+
columns = Index(range(len(values)))
174+
167175
return arrays_to_mgr(values, columns, index, columns, dtype=dtype)
168176

169177
# by definition an array here
@@ -416,7 +424,7 @@ def get_names_from_index(data):
416424
return index
417425

418426

419-
def _get_axes(N, K, index, columns):
427+
def _get_axes(N, K, index, columns) -> Tuple[Index, Index]:
420428
# helper to create the axes as indexes
421429
# return axes or defaults
422430

@@ -635,12 +643,7 @@ def sanitize_index(data, index: Index):
635643
if len(data) != len(index):
636644
raise ValueError("Length of values does not match length of index")
637645

638-
if isinstance(data, ABCIndexClass):
639-
pass
640-
elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)):
641-
data = data._values
642-
643-
elif isinstance(data, np.ndarray):
646+
if isinstance(data, np.ndarray):
644647

645648
# coerce datetimelike types
646649
if data.dtype.kind in ["M", "m"]:

pandas/core/internals/managers.py

+20-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import itertools
33
import operator
44
import re
5-
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union
5+
from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
66
import warnings
77

88
import numpy as np
@@ -341,7 +341,7 @@ def _verify_integrity(self) -> None:
341341
tot_items = sum(len(x.mgr_locs) for x in self.blocks)
342342
for block in self.blocks:
343343
if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
344-
construction_error(tot_items, block.shape[1:], self.axes)
344+
raise construction_error(tot_items, block.shape[1:], self.axes)
345345
if len(self.items) != tot_items:
346346
raise AssertionError(
347347
"Number of manager items must equal union of "
@@ -1648,7 +1648,7 @@ def concat(
16481648
# Constructor Helpers
16491649

16501650

1651-
def create_block_manager_from_blocks(blocks, axes):
1651+
def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
16521652
try:
16531653
if len(blocks) == 1 and not isinstance(blocks[0], Block):
16541654
# if blocks[0] is of length 0, return empty blocks
@@ -1669,18 +1669,23 @@ def create_block_manager_from_blocks(blocks, axes):
16691669
except ValueError as e:
16701670
blocks = [getattr(b, "values", b) for b in blocks]
16711671
tot_items = sum(b.shape[0] for b in blocks)
1672-
construction_error(tot_items, blocks[0].shape[1:], axes, e)
1672+
raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
16731673

16741674

1675-
def create_block_manager_from_arrays(arrays, names, axes):
1675+
def create_block_manager_from_arrays(
1676+
arrays, names: Index, axes: List[Index]
1677+
) -> BlockManager:
1678+
assert isinstance(names, Index)
1679+
assert isinstance(axes, list)
1680+
assert all(isinstance(x, Index) for x in axes)
16761681

16771682
try:
16781683
blocks = form_blocks(arrays, names, axes)
16791684
mgr = BlockManager(blocks, axes)
16801685
mgr._consolidate_inplace()
16811686
return mgr
16821687
except ValueError as e:
1683-
construction_error(len(arrays), arrays[0].shape, axes, e)
1688+
raise construction_error(len(arrays), arrays[0].shape, axes, e)
16841689

16851690

16861691
def construction_error(tot_items, block_shape, axes, e=None):
@@ -1695,23 +1700,25 @@ def construction_error(tot_items, block_shape, axes, e=None):
16951700
if len(implied) <= 2:
16961701
implied = implied[::-1]
16971702

1703+
# We return the exception object instead of raising it so that we
1704+
# can raise it in the caller; mypy plays better with that
16981705
if passed == implied and e is not None:
1699-
raise e
1706+
return e
17001707
if block_shape[0] == 0:
1701-
raise ValueError("Empty data passed with indices specified.")
1702-
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
1708+
return ValueError("Empty data passed with indices specified.")
1709+
return ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
17031710

17041711

17051712
# -----------------------------------------------------------------------
17061713

17071714

1708-
def form_blocks(arrays, names, axes):
1715+
def form_blocks(arrays, names: Index, axes) -> List[Block]:
17091716
# put "leftover" items in float bucket, where else?
17101717
# generalize?
1711-
items_dict = defaultdict(list)
1718+
items_dict: DefaultDict[str, List] = defaultdict(list)
17121719
extra_locs = []
17131720

1714-
names_idx = ensure_index(names)
1721+
names_idx = names
17151722
if names_idx.equals(axes[0]):
17161723
names_indexer = np.arange(len(names_idx))
17171724
else:
@@ -1729,7 +1736,7 @@ def form_blocks(arrays, names, axes):
17291736
block_type = get_block_type(v)
17301737
items_dict[block_type.__name__].append((i, k, v))
17311738

1732-
blocks = []
1739+
blocks: List[Block] = []
17331740
if len(items_dict["FloatBlock"]):
17341741
float_blocks = _multi_blockify(items_dict["FloatBlock"])
17351742
blocks.extend(float_blocks)

0 commit comments

Comments
 (0)