From 619594e3394b45188681e2172c65c0e96a330696 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 15:39:23 -0800 Subject: [PATCH 1/2] annotate --- pandas/core/internals/blocks.py | 84 +++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 34fa4c0e6544e..7592fbdc93af4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,6 +11,7 @@ import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare +from pandas._typing import DtypeObj from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -170,20 +171,20 @@ def _consolidate_key(self): return (self._can_consolidate, self.dtype.name) @property - def _is_single_block(self): + def _is_single_block(self) -> bool: return self.ndim == 1 @property - def is_view(self): + def is_view(self) -> bool: """ return a boolean if I am possibly a view """ return self.values.base is not None @property - def is_datelike(self): + def is_datelike(self) -> bool: """ return True if I am a non-datelike """ return self.is_datetime or self.is_timedelta - def is_categorical_astype(self, dtype): + def is_categorical_astype(self, dtype) -> bool: """ validate that we have a astypeable to categorical, returns a boolean if we are a categorical @@ -255,7 +256,7 @@ def mgr_locs(self, new_mgr_locs): self._mgr_locs = new_mgr_locs @property - def array_dtype(self): + def array_dtype(self) -> DtypeObj: """ the dtype to return if I want to construct this block as an array @@ -333,7 +334,7 @@ def dtype(self): return self.values.dtype @property - def ftype(self): + def ftype(self) -> str: if getattr(self.values, "_pandas_ftype", False): dtype = self.dtype.subtype else: @@ -367,7 +368,7 @@ def set(self, locs, values): """ self.values[locs] = values - def delete(self, loc): + def delete(self, loc) -> None: """ Delete given loc(-s) from block in-place. """ @@ -401,7 +402,7 @@ def _split_op_result(self, result) -> List["Block"]: return [result] - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace: bool = False, downcast=None): """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again @@ -687,7 +688,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): return values # block actions # - def copy(self, deep=True): + def copy(self, deep: bool = True): """ copy constructor """ values = self.values if deep: @@ -695,7 +696,13 @@ def copy(self, deep=True): return self.make_block_same_class(values, ndim=self.ndim) def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True + self, + to_replace, + value, + inplace: bool = False, + filter=None, + regex: bool = False, + convert: bool = True, ): """ replace the to_replace value with value, possible to create new @@ -917,7 +924,15 @@ def setitem(self, indexer, value): block = self.make_block(values) return block - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + def putmask( + self, + mask, + new, + align: bool = True, + inplace: bool = False, + axis: int = 0, + transpose: bool = False, + ): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1264,7 +1279,7 @@ def func(x): blocks = [self.make_block_same_class(interp_values)] return self._maybe_downcast(blocks, downcast) - def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): + def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_tuple=None): """ Take values according to indexer and return them as a block.bb @@ -1305,7 +1320,7 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]: new_values = _block_shape(new_values, ndim=self.ndim) return [self.make_block(values=new_values)] - def shift(self, periods, axis=0, fill_value=None): + def shift(self, periods, axis: int = 0, fill_value=None): """ shift the block by periods, possibly upcast """ # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also @@ -1337,7 +1352,7 @@ def where( self, other, cond, - align=True, + align: bool = True, errors="raise", try_cast: bool = False, axis: int = 0, @@ -1349,11 +1364,12 @@ def where( ---------- other : a ndarray/object cond : the condition to respect - align : boolean, perform alignment on other/cond + align : bool, default True + Perform alignment on other/cond. errors : str, {'raise', 'ignore'}, default 'raise' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object - axis : int + axis : int, default 0 Returns ------- @@ -1485,7 +1501,7 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): blocks = [make_block(new_values, placement=new_placement)] return blocks, mask - def quantile(self, qs, interpolation="linear", axis=0): + def quantile(self, qs, interpolation="linear", axis: int = 0): """ compute the quantiles of the @@ -1542,7 +1558,13 @@ def quantile(self, qs, interpolation="linear", axis=0): return make_block(result, placement=np.arange(len(result)), ndim=ndim) def _replace_coerce( - self, to_replace, value, inplace=True, regex=False, convert=False, mask=None + self, + to_replace, + value, + inplace: bool = True, + regex: bool = False, + convert: bool = False, + mask=None, ): """ Replace value corresponding to the given boolean array with another @@ -1554,7 +1576,7 @@ def _replace_coerce( Scalar to replace or regular expression to match. value : object Replacement object. - inplace : bool, default False + inplace : bool, default True Perform inplace modification. regex : bool, default False If true, perform regular expression substitution. @@ -1641,7 +1663,9 @@ def set(self, locs, values, check=False): assert locs.tolist() == [0] self.values = values - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + def putmask( + self, mask, new, align=True, inplace=False, axis=0, transpose=False, + ): """ putmask the data to the block; we must be a single block and not generate other blocks @@ -1757,7 +1781,7 @@ def _can_hold_na(self): return self._holder._can_hold_na @property - def is_view(self): + def is_view(self) -> bool: """Extension arrays are never treated as views.""" return False @@ -1822,7 +1846,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): # we are expected to return a 2-d ndarray return values.reshape(1, len(values)) - def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): + def take_nd(self, indexer, axis: int = 0, new_mgr_locs=None, fill_tuple=None): """ Take values according to indexer and return them as a block. """ @@ -2083,7 +2107,7 @@ def to_native_types( ) return formatter.get_result_as_array() - def should_store(self, value): + def should_store(self, value) -> bool: # when inserting a column should not coerce integers to floats # unnecessarily return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype @@ -2101,7 +2125,7 @@ def _can_hold_element(self, element: Any) -> bool: element, (float, int, complex, np.float_, np.int_) ) and not isinstance(element, (bool, np.bool_)) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass(value.dtype.type, np.complexfloating) @@ -2120,7 +2144,7 @@ def _can_hold_element(self, element: Any) -> bool: ) return is_integer(element) - def should_store(self, value): + def should_store(self, value) -> bool: return is_integer_dtype(value) and value.dtype == self.dtype @@ -2258,7 +2282,7 @@ def to_native_types( ).reshape(i8values.shape) return np.atleast_2d(result) - def should_store(self, value): + def should_store(self, value) -> bool: return ( issubclass(value.dtype.type, np.datetime64) and not is_datetime64tz_dtype(value) @@ -2323,7 +2347,7 @@ def _maybe_coerce_values(self, values): return values @property - def is_view(self): + def is_view(self) -> bool: """ return a boolean if I am possibly a view """ # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None @@ -2510,7 +2534,7 @@ def fillna(self, value, **kwargs): ) return super().fillna(value, **kwargs) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass( value.dtype.type, np.timedelta64 ) and not is_extension_array_dtype(value) @@ -2556,7 +2580,7 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( value ) @@ -2648,7 +2672,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def should_store(self, value): + def should_store(self, value) -> bool: return not ( issubclass( value.dtype.type, From 3ff0656f91ef81db6360cbe996b93b057fcf2a71 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 15:41:41 -0800 Subject: [PATCH 2/2] annotate --- pandas/core/internals/managers.py | 101 ++++++++++++++++++------------ 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9c90b20fc0f16..64896e2cac311 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -145,19 +145,20 @@ def __init__( self._rebuild_blknos_and_blklocs() - def make_empty(self, axes=None): + def make_empty(self, axes=None) -> "BlockManager": """ return an empty BlockManager with the items axis of len 0 """ if axes is None: - axes = [ensure_index([])] + [ensure_index(a) for a in self.axes[1:]] + axes = [Index([])] + self.axes[1:] # preserve dtype if possible if self.ndim == 1: + assert isinstance(self, SingleBlockManager) # for mypy blocks = np.array([], dtype=self.array_dtype) else: blocks = [] return type(self)(blocks, axes) - def __nonzero__(self): + def __nonzero__(self) -> bool: return True # Python3 compat @@ -171,7 +172,7 @@ def shape(self) -> Tuple[int, ...]: def ndim(self) -> int: return len(self.axes) - def set_axis(self, axis: int, new_labels: Index): + def set_axis(self, axis: int, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. old_len = len(self.axes[axis]) new_len = len(new_labels) @@ -214,7 +215,7 @@ def _is_single_block(self) -> bool: 0, len(self), 1 ) - def _rebuild_blknos_and_blklocs(self): + def _rebuild_blknos_and_blklocs(self) -> None: """ Update mgr._blknos / mgr._blklocs. """ @@ -288,7 +289,7 @@ def unpickle_block(values, mgr_locs): self._post_setstate() - def _post_setstate(self): + def _post_setstate(self) -> None: self._is_consolidated = False self._known_consolidated = False self._rebuild_blknos_and_blklocs() @@ -308,7 +309,7 @@ def __repr__(self) -> str: output += f"\n{pprint_thing(block)}" return output - def _verify_integrity(self): + def _verify_integrity(self) -> None: mgr_shape = self.shape tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: @@ -347,7 +348,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self, f, filter=None, **kwargs): + def apply(self, f, filter=None, **kwargs) -> "BlockManager": """ Iterate over the blocks, collect and create a new BlockManager. @@ -430,13 +431,13 @@ def apply(self, f, filter=None, **kwargs): def quantile( self, - axis=0, - consolidate=True, - transposed=False, + axis: int = 0, + consolidate: bool = True, + transposed: bool = False, interpolation="linear", qs=None, numeric_only=None, - ): + ) -> "BlockManager": """ Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and @@ -455,7 +456,7 @@ def quantile( Returns ------- - Block Manager (new object) + BlockManager """ # Series dispatches to DataFrame for quantile, which allows us to # simplify some of the code here and in the blocks @@ -533,44 +534,48 @@ def get_axe(block, qs, axes): fastpath=True, ) - def isna(self, func): + def isna(self, func) -> "BlockManager": return self.apply("apply", func=func) - def where(self, **kwargs): + def where(self, **kwargs) -> "BlockManager": return self.apply("where", **kwargs) - def setitem(self, **kwargs): + def setitem(self, **kwargs) -> "BlockManager": return self.apply("setitem", **kwargs) def putmask(self, **kwargs): return self.apply("putmask", **kwargs) - def diff(self, **kwargs): + def diff(self, **kwargs) -> "BlockManager": return self.apply("diff", **kwargs) - def interpolate(self, **kwargs): + def interpolate(self, **kwargs) -> "BlockManager": return self.apply("interpolate", **kwargs) - def shift(self, **kwargs): + def shift(self, **kwargs) -> "BlockManager": return self.apply("shift", **kwargs) - def fillna(self, **kwargs): + def fillna(self, **kwargs) -> "BlockManager": return self.apply("fillna", **kwargs) - def downcast(self, **kwargs): + def downcast(self, **kwargs) -> "BlockManager": return self.apply("downcast", **kwargs) - def astype(self, dtype, copy: bool = False, errors: str = "raise"): + def astype( + self, dtype, copy: bool = False, errors: str = "raise" + ) -> "BlockManager": return self.apply("astype", dtype=dtype, copy=copy, errors=errors) - def convert(self, **kwargs): + def convert(self, **kwargs) -> "BlockManager": return self.apply("convert", **kwargs) - def replace(self, value, **kwargs): + def replace(self, value, **kwargs) -> "BlockManager": assert np.ndim(value) == 0, value return self.apply("replace", value=value, **kwargs) - def replace_list(self, src_list, dest_list, inplace=False, regex=False): + def replace_list( + self, src_list, dest_list, inplace: bool = False, regex: bool = False + ) -> "BlockManager": """ do a list replace """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -602,7 +607,7 @@ def comp(s, regex=False): rb = [blk if inplace else blk.copy()] for i, (s, d) in enumerate(zip(src_list, dest_list)): # TODO: assert/validate that `d` is always a scalar? - new_rb = [] + new_rb: List[Block] = [] for b in rb: m = masks[i][b.mgr_locs.indexer] convert = i == src_len @@ -633,7 +638,7 @@ def is_consolidated(self) -> bool: self._consolidate_check() return self._is_consolidated - def _consolidate_check(self): + def _consolidate_check(self) -> None: ftypes = [blk.ftype for blk in self.blocks] self._is_consolidated = len(ftypes) == len(set(ftypes)) self._known_consolidated = True @@ -670,7 +675,7 @@ def is_view(self) -> bool: return False - def get_bool_data(self, copy: bool = False): + def get_bool_data(self, copy: bool = False) -> "BlockManager": """ Parameters ---------- @@ -680,7 +685,7 @@ def get_bool_data(self, copy: bool = False): self._consolidate_inplace() return self.combine([b for b in self.blocks if b.is_bool], copy) - def get_numeric_data(self, copy: bool = False): + def get_numeric_data(self, copy: bool = False) -> "BlockManager": """ Parameters ---------- @@ -712,7 +717,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": return type(self)(new_blocks, axes, do_integrity_check=False) - def get_slice(self, slobj: slice, axis: int = 0): + def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) @@ -737,7 +742,7 @@ def __contains__(self, item) -> bool: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep=True): + def copy(self, deep=True) -> "BlockManager": """ Make deep or shallow copy of BlockManager @@ -797,7 +802,7 @@ def as_array(self, transpose: bool = False) -> np.ndarray: return arr.transpose() if transpose else arr - def _interleave(self): + def _interleave(self) -> np.ndarray: """ Return ndarray from blocks with specified item order Items must be contained in the blocks @@ -807,7 +812,7 @@ def _interleave(self): # TODO: https://github.com/pandas-dev/pandas/issues/22791 # Give EAs some input on what happens here. Sparse needs this. if is_sparse(dtype): - dtype = dtype.subtype + dtype = dtype.subtype # type: ignore elif is_extension_array_dtype(dtype): dtype = "object" @@ -906,7 +911,7 @@ def consolidate(self) -> "BlockManager": bm._consolidate_inplace() return bm - def _consolidate_inplace(self): + def _consolidate_inplace(self) -> None: if not self.is_consolidated(): self.blocks = tuple(_consolidate(self.blocks)) self._is_consolidated = True @@ -1168,7 +1173,13 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): self._consolidate_inplace() def reindex_axis( - self, new_index, axis, method=None, limit=None, fill_value=None, copy=True + self, + new_index, + axis: int, + method=None, + limit=None, + fill_value=None, + copy: bool = True, ): """ Conform block manager to new index. @@ -1183,7 +1194,13 @@ def reindex_axis( ) def reindex_indexer( - self, new_axis, indexer, axis, fill_value=None, allow_dups=False, copy=True + self, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups=False, + copy: bool = True, ): """ Parameters @@ -1191,8 +1208,10 @@ def reindex_indexer( new_axis : Index indexer : ndarray of int64 or None axis : int - fill_value : object - allow_dups : bool + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True + pandas-indexer with -1's only. """ @@ -1329,7 +1348,7 @@ def _make_na_block(self, placement, fill_value=None): block_values.fill(fill_value) return make_block(block_values, placement=placement) - def take(self, indexer, axis=1, verify=True, convert=True): + def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True): """ Take items along any axis. """ @@ -1506,11 +1525,11 @@ def index(self) -> Index: return self.axes[0] @property - def dtype(self): + def dtype(self) -> DtypeObj: return self._block.dtype @property - def array_dtype(self): + def array_dtype(self) -> DtypeObj: return self._block.array_dtype def get_dtype_counts(self):