Skip to content

TYP: enforce annotation on SingleBlockManager.__init__ #32421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 11, 2020
68 changes: 34 additions & 34 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib
from pandas._typing import DtypeObj, Label
from pandas._typing import ArrayLike, DtypeObj, Label
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -154,6 +154,7 @@ def make_empty(self, axes=None) -> "BlockManager":
if self.ndim == 1:
assert isinstance(self, SingleBlockManager) # for mypy
blocks = np.array([], dtype=self.array_dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blocks -> arr

blocks = make_block(blocks, placement=slice(0, 0), ndim=1)
else:
blocks = []
return type(self)(blocks, axes)
Expand Down Expand Up @@ -426,7 +427,11 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager":

if len(result_blocks) == 0:
return self.make_empty(self.axes)
bm = type(self)(result_blocks, self.axes, do_integrity_check=False)
if self.ndim == 1:
assert len(result_blocks) == 1
bm = type(self)(result_blocks[0], self.axes) # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you splitting this constructions? much harder to grok this code

else:
bm = type(self)(result_blocks, self.axes, do_integrity_check=False)
return bm

def quantile(
Expand Down Expand Up @@ -626,8 +631,12 @@ def comp(s, regex=False):
rb = new_rb
result_blocks.extend(rb)

bm = type(self)(result_blocks, self.axes)
bm._consolidate_inplace()
if self.ndim == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would make a function or method to do this, you seem to be repeating this code

assert len(result_blocks) == 1
bm = type(self)(result_blocks[0], self.axes)
else:
bm = type(self)(result_blocks, self.axes)
bm._consolidate_inplace()
return bm

def is_consolidated(self) -> bool:
Expand Down Expand Up @@ -715,6 +724,9 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager":
axes = list(self.axes)
axes[0] = self.items.take(indexer)

if self.ndim == 1:
assert len(new_blocks) == 1
return type(self)(new_blocks[0], axes) # type: ignore
return type(self)(new_blocks, axes, do_integrity_check=False)

def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager":
Expand Down Expand Up @@ -1263,6 +1275,9 @@ def reindex_indexer(

new_axes = list(self.axes)
new_axes[axis] = new_axis
if self.ndim == 1:
assert len(new_blocks) == 1
return type(self)(new_blocks[0], new_axes)
return type(self)(new_blocks, new_axes)

def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
Expand Down Expand Up @@ -1464,6 +1479,8 @@ def __init__(
do_integrity_check: bool = False,
fastpath: bool = False,
):
assert isinstance(block, Block), type(block)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins is mypy reliable enough that we can remove this assertion? these add up in things like groupby.apply when we create many many Series objects

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is mypy reliable enough that we can remove this assertion?

not really as we don't have very strict type checking.

we could call the constructor with a variable that has type Any, or we could call the constructor from a function that is not currently type checked (still don't have check_untyped_defs for many modules)

to prevent calling the constructor with a variable with type Any, we would need disallow dynamic typing completely, https://mypy.readthedocs.io/en/stable/config_file.html#disallow-dynamic-typing


if isinstance(axis, list):
if len(axis) != 1:
raise ValueError(
Expand All @@ -1474,39 +1491,19 @@ def __init__(
# passed from constructor, single block, single axis
if fastpath:
self.axes = [axis]
if isinstance(block, list):

# empty block
if len(block) == 0:
block = [np.array([])]
elif len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]
else:
self.axes = [ensure_index(axis)]

# create the block here
if isinstance(block, list):

# provide consolidation to the interleaved_dtype
if len(block) > 1:
dtype = _interleaved_dtype(block)
block = [b.astype(dtype) for b in block]
block = _consolidate(block)

if len(block) != 1:
raise ValueError(
"Cannot create SingleBlockManager with more than 1 block"
)
block = block[0]

if not isinstance(block, Block):
block = make_block(block, placement=slice(0, len(axis)), ndim=1)

self.blocks = tuple([block])

@classmethod
def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager":
"""
Constructor for if we have an array that is not yet a Block.
"""
block = make_block(array, placement=slice(0, len(index)), ndim=1)
return cls(block, index, fastpath=True)

def _post_setstate(self):
pass

Expand All @@ -1532,7 +1529,10 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager":
if axis >= self.ndim:
raise IndexError("Requested axis not found in manager")

return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True)
blk = self._block
array = blk._slice(slobj)
block = blk.make_block_same_class(array, placement=range(len(array)))
return type(self)(block, self.index[slobj], fastpath=True)

@property
def index(self) -> Index:
Expand Down Expand Up @@ -1594,7 +1594,7 @@ def fast_xs(self, loc):
"""
raise NotImplementedError("Use series._values[loc] instead")

def concat(self, to_concat, new_axis) -> "SingleBlockManager":
def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager":
"""
Concatenate a list of SingleBlockManagers into a single
SingleBlockManager.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def __init__(

# data is an ndarray, index is defined
if not isinstance(data, SingleBlockManager):
data = SingleBlockManager(data, index, fastpath=True)
data = SingleBlockManager.from_array(data, index)
if copy:
data = data.copy()
if index is None:
Expand Down Expand Up @@ -321,7 +321,7 @@ def __init__(
else:
data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)

data = SingleBlockManager(data, index, fastpath=True)
data = SingleBlockManager.from_array(data, index)

generic.NDFrame.__init__(self, data)
self.name = name
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/test_external_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas.core.internals import BlockManager
from pandas.core.internals import BlockManager, SingleBlockManager
from pandas.core.internals.blocks import Block, NonConsolidatableMixIn


Expand Down Expand Up @@ -36,7 +36,8 @@ def test_concat_series():
# GH17728
values = np.arange(3, dtype="int64")
block = CustomBlock(values, placement=slice(0, 3))
s = pd.Series(block, pd.RangeIndex(3), fastpath=True)
mgr = SingleBlockManager(block, pd.RangeIndex(3))
s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True)

res = pd.concat([s, s])
assert isinstance(res._data.blocks[0], CustomBlock)
Expand Down