diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index e70652b81c42f..bc45b7c74ecc1 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -18,8 +18,6 @@ BlockManager, SingleBlockManager, concatenate_block_managers, - create_block_manager_from_arrays, - create_block_manager_from_blocks, ) __all__ = [ @@ -40,6 +38,4 @@ "BlockManager", "SingleBlockManager", "concatenate_block_managers", - "create_block_manager_from_arrays", - "create_block_manager_from_blocks", ] diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 3e0fb8455884a..fc7da4155db36 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -3,6 +3,7 @@ constructors before passing them to a BlockManager. """ from collections import abc +from typing import Tuple import numpy as np import numpy.ma as ma @@ -29,7 +30,6 @@ ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, - ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex, ) @@ -44,7 +44,7 @@ get_objs_combined_axis, union_indexes, ) -from pandas.core.internals import ( +from pandas.core.internals.managers import ( create_block_manager_from_arrays, create_block_manager_from_blocks, ) @@ -53,12 +53,16 @@ # BlockManager Interface -def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrity=True): +def arrays_to_mgr( + arrays, arr_names, index, columns, dtype=None, verify_integrity: bool = True +): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ + arr_names = ensure_index(arr_names) + if verify_integrity: # figure out the index, if necessary if index is None: @@ -70,6 +74,9 @@ def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrit arrays = _homogenize(arrays, index, dtype) columns = ensure_index(columns) + else: + columns = ensure_index(columns) + index = ensure_index(index) # from BlockManager perspective axes = [columns, index] @@ -163,7 +170,8 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): values = [values] if columns is None: - columns = list(range(len(values))) + columns = Index(range(len(values))) + return arrays_to_mgr(values, columns, index, columns, dtype=dtype) # by definition an array here @@ -416,7 +424,7 @@ def get_names_from_index(data): return index -def _get_axes(N, K, index, columns): +def _get_axes(N, K, index, columns) -> Tuple[Index, Index]: # helper to create the axes as indexes # return axes or defaults @@ -635,12 +643,7 @@ def sanitize_index(data, index: Index): if len(data) != len(index): raise ValueError("Length of values does not match length of index") - if isinstance(data, ABCIndexClass): - pass - elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)): - data = data._values - - elif isinstance(data, np.ndarray): + if isinstance(data, np.ndarray): # coerce datetimelike types if data.dtype.kind in ["M", "m"]: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 182a5b14a1242..07e78cf48d33d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,7 +2,7 @@ import itertools import operator import re -from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import warnings import numpy as np @@ -342,7 +342,7 @@ def _verify_integrity(self) -> None: tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: - construction_error(tot_items, block.shape[1:], self.axes) + raise construction_error(tot_items, block.shape[1:], self.axes) if len(self.items) != tot_items: raise AssertionError( "Number of manager items must equal union of " @@ -1649,7 +1649,7 @@ def concat( # Constructor Helpers -def create_block_manager_from_blocks(blocks, axes): +def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager: try: if len(blocks) == 1 and not isinstance(blocks[0], Block): # if blocks[0] is of length 0, return empty blocks @@ -1670,10 +1670,15 @@ def create_block_manager_from_blocks(blocks, axes): except ValueError as e: blocks = [getattr(b, "values", b) for b in blocks] tot_items = sum(b.shape[0] for b in blocks) - construction_error(tot_items, blocks[0].shape[1:], axes, e) + raise construction_error(tot_items, blocks[0].shape[1:], axes, e) -def create_block_manager_from_arrays(arrays, names, axes): +def create_block_manager_from_arrays( + arrays, names: Index, axes: List[Index] +) -> BlockManager: + assert isinstance(names, Index) + assert isinstance(axes, list) + assert all(isinstance(x, Index) for x in axes) try: blocks = form_blocks(arrays, names, axes) @@ -1681,7 +1686,7 @@ def create_block_manager_from_arrays(arrays, names, axes): mgr._consolidate_inplace() return mgr except ValueError as e: - construction_error(len(arrays), arrays[0].shape, axes, e) + raise construction_error(len(arrays), arrays[0].shape, axes, e) def construction_error(tot_items, block_shape, axes, e=None): @@ -1696,23 +1701,25 @@ def construction_error(tot_items, block_shape, axes, e=None): if len(implied) <= 2: implied = implied[::-1] + # We return the exception object instead of raising it so that we + # can raise it in the caller; mypy plays better with that if passed == implied and e is not None: - raise e + return e if block_shape[0] == 0: - raise ValueError("Empty data passed with indices specified.") - raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + return ValueError("Empty data passed with indices specified.") + return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") # ----------------------------------------------------------------------- -def form_blocks(arrays, names, axes): +def form_blocks(arrays, names: Index, axes) -> List[Block]: # put "leftover" items in float bucket, where else? # generalize? - items_dict = defaultdict(list) + items_dict: DefaultDict[str, List] = defaultdict(list) extra_locs = [] - names_idx = ensure_index(names) + names_idx = names if names_idx.equals(axes[0]): names_indexer = np.arange(len(names_idx)) else: @@ -1730,7 +1737,7 @@ def form_blocks(arrays, names, axes): block_type = get_block_type(v) items_dict[block_type.__name__].append((i, k, v)) - blocks = [] + blocks: List[Block] = [] if len(items_dict["FloatBlock"]): float_blocks = _multi_blockify(items_dict["FloatBlock"]) blocks.extend(float_blocks)