Skip to content

TYP: enforce annotation on SingleBlockManager.__init__ #32421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 11, 2020
95 changes: 51 additions & 44 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import itertools
import operator
import re
from typing import Dict, List, Optional, Sequence, Tuple, Union
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union

import numpy as np

from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib
from pandas._typing import DtypeObj, Label
from pandas._typing import ArrayLike, DtypeObj, Label
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -58,6 +58,8 @@

# TODO: flexible with index=None and/or items=None

T = TypeVar("T", bound="BlockManager")


class BlockManager(PandasObject):
"""
Expand Down Expand Up @@ -149,6 +151,13 @@ def __init__(
self._blknos = None
self._blklocs = None

@classmethod
def from_blocks(cls, blocks: List[Block], axes: List[Index]):
"""
Constructor for BlockManager and SingleBlockManager with same signature.
"""
return cls(blocks, axes, do_integrity_check=False)

@property
def blknos(self):
"""
Expand Down Expand Up @@ -176,18 +185,19 @@ def blklocs(self):

return self._blklocs

def make_empty(self, axes=None) -> "BlockManager":
def make_empty(self: T, axes=None) -> T:
""" return an empty BlockManager with the items axis of len 0 """
if axes is None:
axes = [Index([])] + self.axes[1:]

# preserve dtype if possible
if self.ndim == 1:
assert isinstance(self, SingleBlockManager) # for mypy
blocks = np.array([], dtype=self.array_dtype)
arr = np.array([], dtype=self.array_dtype)
blocks = [make_block(arr, placement=slice(0, 0), ndim=1)]
else:
blocks = []
return type(self)(blocks, axes)
return type(self).from_blocks(blocks, axes)

def __nonzero__(self) -> bool:
return True
Expand Down Expand Up @@ -380,7 +390,7 @@ def reduce(self, func, *args, **kwargs):

return res

def apply(self, f, filter=None, **kwargs) -> "BlockManager":
def apply(self: T, f, filter=None, **kwargs) -> T:
"""
Iterate over the blocks, collect and create a new BlockManager.

Expand Down Expand Up @@ -458,8 +468,8 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager":

if len(result_blocks) == 0:
return self.make_empty(self.axes)
bm = type(self)(result_blocks, self.axes, do_integrity_check=False)
return bm

return type(self).from_blocks(result_blocks, self.axes)

def quantile(
self,
Expand Down Expand Up @@ -658,7 +668,7 @@ def comp(s, regex=False):
rb = new_rb
result_blocks.extend(rb)

bm = type(self)(result_blocks, self.axes)
bm = type(self).from_blocks(result_blocks, self.axes)
bm._consolidate_inplace()
return bm

Expand Down Expand Up @@ -747,7 +757,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager":
axes = list(self.axes)
axes[0] = self.items.take(indexer)

return type(self)(new_blocks, axes, do_integrity_check=False)
return type(self).from_blocks(new_blocks, axes)

def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager":

Expand All @@ -774,7 +784,7 @@ def __contains__(self, item) -> bool:
def nblocks(self) -> int:
return len(self.blocks)

def copy(self, deep=True) -> "BlockManager":
def copy(self: T, deep=True) -> T:
"""
Make deep or shallow copy of BlockManager

Expand Down Expand Up @@ -1244,14 +1254,14 @@ def reindex_axis(
)

def reindex_indexer(
self,
self: T,
new_axis,
indexer,
axis: int,
fill_value=None,
allow_dups=False,
allow_dups: bool = False,
copy: bool = True,
):
) -> T:
"""
Parameters
----------
Expand Down Expand Up @@ -1299,7 +1309,8 @@ def reindex_indexer(

new_axes = list(self.axes)
new_axes[axis] = new_axis
return type(self)(new_blocks, new_axes)

return type(self).from_blocks(new_blocks, new_axes)

def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
"""
Expand Down Expand Up @@ -1500,6 +1511,8 @@ def __init__(
do_integrity_check: bool = False,
fastpath: bool = False,
):
assert isinstance(block, Block), type(block)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins is mypy reliable enough that we can remove this assertion? these add up in things like groupby.apply when we create many many Series objects

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is mypy reliable enough that we can remove this assertion?

not really as we don't have very strict type checking.

we could call the constructor with a variable that has type Any, or we could call the constructor from a function that is not currently type checked (still don't have check_untyped_defs for many modules)

to prevent calling the constructor with a variable with type Any, we would need disallow dynamic typing completely, https://mypy.readthedocs.io/en/stable/config_file.html#disallow-dynamic-typing


if isinstance(axis, list):
if len(axis) != 1:
raise ValueError(
Expand All @@ -1510,38 +1523,29 @@ def __init__(
# passed from constructor, single block, single axis
if fastpath:
self.axes = [axis]
if isinstance(block, list):

# empty block
if len(block) == 0:
block = [np.array([])]
elif len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]
else:
self.axes = [ensure_index(axis)]

# create the block here
if isinstance(block, list):

# provide consolidation to the interleaved_dtype
if len(block) > 1:
dtype = _interleaved_dtype(block)
block = [b.astype(dtype) for b in block]
block = _consolidate(block)

if len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]
self.blocks = tuple([block])

if not isinstance(block, Block):
block = make_block(block, placement=slice(0, len(axis)), ndim=1)
@classmethod
def from_blocks(
cls, blocks: List[Block], axes: List[Index]
) -> "SingleBlockManager":
"""
Constructor for BlockManager and SingleBlockManager with same signature.
"""
assert len(blocks) == 1
assert len(axes) == 1
return cls(blocks[0], axes[0], do_integrity_check=False, fastpath=True)

self.blocks = tuple([block])
@classmethod
def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager":
"""
Constructor for if we have an array that is not yet a Block.
"""
block = make_block(array, placement=slice(0, len(index)), ndim=1)
return cls(block, index, fastpath=True)

def _post_setstate(self):
pass
Expand All @@ -1568,7 +1572,10 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager":
if axis >= self.ndim:
raise IndexError("Requested axis not found in manager")

return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True)
blk = self._block
array = blk._slice(slobj)
block = blk.make_block_same_class(array, placement=range(len(array)))
return type(self)(block, self.index[slobj], fastpath=True)

@property
def index(self) -> Index:
Expand Down Expand Up @@ -1630,7 +1637,7 @@ def fast_xs(self, loc):
"""
raise NotImplementedError("Use series._values[loc] instead")

def concat(self, to_concat, new_axis) -> "SingleBlockManager":
def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager":
"""
Concatenate a list of SingleBlockManagers into a single
SingleBlockManager.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def __init__(

# data is an ndarray, index is defined
if not isinstance(data, SingleBlockManager):
data = SingleBlockManager(data, index, fastpath=True)
data = SingleBlockManager.from_array(data, index)
if copy:
data = data.copy()
if index is None:
Expand Down Expand Up @@ -317,7 +317,7 @@ def __init__(
else:
data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)

data = SingleBlockManager(data, index, fastpath=True)
data = SingleBlockManager.from_array(data, index)

generic.NDFrame.__init__(self, data)
self.name = name
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/test_external_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas.core.internals import BlockManager
from pandas.core.internals import BlockManager, SingleBlockManager
from pandas.core.internals.blocks import Block, NonConsolidatableMixIn


Expand Down Expand Up @@ -36,7 +36,8 @@ def test_concat_series():
# GH17728
values = np.arange(3, dtype="int64")
block = CustomBlock(values, placement=slice(0, 3))
s = pd.Series(block, pd.RangeIndex(3), fastpath=True)
mgr = SingleBlockManager(block, pd.RangeIndex(3))
s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True)

res = pd.concat([s, s])
assert isinstance(res._data.blocks[0], CustomBlock)
Expand Down