From e3b709104c37107955b8bcd881609101eb88a844 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 8 Jan 2021 00:09:23 +0100 Subject: [PATCH] CLN: add typing to dtype arg in core/internals, core/reshape and core (GH38808) --- pandas/core/base.py | 10 ++++++++-- pandas/core/generic.py | 11 ++++++++--- pandas/core/internals/blocks.py | 16 ++++++++-------- pandas/core/internals/managers.py | 10 ++++++---- pandas/core/reshape/reshape.py | 11 ++++++----- pandas/core/series.py | 16 ++++++++++++---- 6 files changed, 48 insertions(+), 26 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index afc22a8446dce..b603ba31f51dd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -19,7 +19,7 @@ import numpy as np import pandas._libs.lib as lib -from pandas._typing import DtypeObj, IndexLabel +from pandas._typing import Dtype, DtypeObj, IndexLabel from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -500,7 +500,13 @@ def array(self) -> ExtensionArray: """ raise AbstractMethodError(self) - def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): + def to_numpy( + self, + dtype: Optional[Dtype] = None, + copy: bool = False, + na_value=lib.no_default, + **kwargs, + ): """ A NumPy ndarray representing the values in this Series or Index. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fe86bf3f582ca..2f4340c17c5a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -37,6 +37,8 @@ from pandas._typing import ( Axis, CompressionOptions, + Dtype, + DtypeArg, FilePathOrBuffer, FrameOrSeries, IndexKeyFunc, @@ -44,6 +46,7 @@ JSONSerializable, Label, Level, + NpDtype, Renamer, StorageOptions, TimedeltaConvertibleTypes, @@ -210,7 +213,9 @@ def __init__( object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) @classmethod - def _init_mgr(cls, mgr, axes, dtype=None, copy: bool = False) -> BlockManager: + def _init_mgr( + cls, mgr, axes, dtype: Optional[Dtype] = None, copy: bool = False + ) -> BlockManager: """ passed a manager and a axes dict """ for a, axe in axes.items(): if axe is not None: @@ -1901,7 +1906,7 @@ def empty(self) -> bool_t: # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented __array_priority__ = 1000 - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: return np.asarray(self._values, dtype=dtype) def __array_wrap__( @@ -2642,7 +2647,7 @@ def to_sql( index: bool_t = True, index_label=None, chunksize=None, - dtype=None, + dtype: Optional[DtypeArg] = None, method=None, ) -> None: """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 94c7d325d0bc8..06ed64401a38f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -15,7 +15,7 @@ ) from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion -from pandas._typing import ArrayLike, DtypeObj, Scalar, Shape +from pandas._typing import ArrayLike, Dtype, DtypeObj, Scalar, Shape from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -241,7 +241,7 @@ def array_values(self) -> ExtensionArray: """ return PandasArray(self.values) - def get_values(self, dtype=None): + def get_values(self, dtype: Optional[Dtype] = None): """ return an internal format, currently just the ndarray this is often overridden to handle to_dense like operations @@ -1669,7 +1669,7 @@ def setitem(self, indexer, value): self.values[indexer] = value return self - def get_values(self, dtype=None): + def get_values(self, dtype: Optional[Dtype] = None): # ExtensionArrays must be iterable, so this works. # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(self.values).reshape(self.shape) @@ -1990,7 +1990,7 @@ class DatetimeLikeBlockMixin(Block): _can_hold_na = True - def get_values(self, dtype=None): + def get_values(self, dtype: Optional[Dtype] = None): """ return object dtype as boxed values, such as Timestamps/Timedelta """ @@ -2168,7 +2168,7 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None - def get_values(self, dtype=None): + def get_values(self, dtype: Optional[Dtype] = None): """ Returns an ndarray of values. @@ -2449,7 +2449,7 @@ def replace( # Constructor Helpers -def get_block_type(values, dtype=None): +def get_block_type(values, dtype: Optional[Dtype] = None): """ Find the appropriate Block subclass to use for the given values and dtype. @@ -2464,7 +2464,7 @@ def get_block_type(values, dtype=None): """ # We use vtype and kind checks because they are much more performant # than is_foo_dtype - dtype = dtype or values.dtype + dtype = cast(np.dtype, pandas_dtype(dtype) if dtype else values.dtype) vtype = dtype.type kind = dtype.kind @@ -2500,7 +2500,7 @@ def get_block_type(values, dtype=None): return cls -def make_block(values, placement, klass=None, ndim=None, dtype=None): +def make_block(values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None): # Ensure that we don't allow PandasArray / PandasDtype in internals. # For now, blocks should be backed by ndarrays when possible. if isinstance(values, ABCPandasArray): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d44a3df45587a..d27efd98ab079 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -17,7 +17,7 @@ import numpy as np from pandas._libs import internals as libinternals, lib -from pandas._typing import ArrayLike, DtypeObj, Label, Shape +from pandas._typing import ArrayLike, Dtype, DtypeObj, Label, Shape from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg @@ -816,7 +816,7 @@ def copy_func(ax): def as_array( self, transpose: bool = False, - dtype=None, + dtype: Optional[Dtype] = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -872,7 +872,9 @@ def as_array( return arr.transpose() if transpose else arr - def _interleave(self, dtype=None, na_value=lib.no_default) -> np.ndarray: + def _interleave( + self, dtype: Optional[Dtype] = None, na_value=lib.no_default + ) -> np.ndarray: """ Return ndarray from blocks with specified item order Items must be contained in the blocks @@ -1842,7 +1844,7 @@ def _simple_blockify(tuples, dtype) -> List[Block]: return [block] -def _multi_blockify(tuples, dtype=None): +def _multi_blockify(tuples, dtype: Optional[Dtype] = None): """ return an array of blocks that potentially have different dtypes """ # group by dtype grouper = itertools.groupby(tuples, lambda x: x[2].dtype) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d855886fb725f..fc8d2aee1e6cd 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -6,6 +6,7 @@ import pandas._libs.algos as libalgos import pandas._libs.reshape as libreshape from pandas._libs.sparse import IntIndex +from pandas._typing import Dtype from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import maybe_promote @@ -732,11 +733,11 @@ def get_dummies( data, prefix=None, prefix_sep="_", - dummy_na=False, + dummy_na: bool = False, columns=None, - sparse=False, - drop_first=False, - dtype=None, + sparse: bool = False, + drop_first: bool = False, + dtype: Optional[Dtype] = None, ) -> "DataFrame": """ Convert categorical variable into dummy/indicator variables. @@ -921,7 +922,7 @@ def _get_dummies_1d( dummy_na=False, sparse=False, drop_first=False, - dtype=None, + dtype: Optional[Dtype] = None, ): from pandas.core.reshape.concat import concat diff --git a/pandas/core/series.py b/pandas/core/series.py index f3f80677d0fe4..668cad4f64ac3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,10 +28,12 @@ AggFuncType, ArrayLike, Axis, + Dtype, DtypeObj, FrameOrSeriesUnion, IndexKeyFunc, Label, + NpDtype, StorageOptions, ValueKeyFunc, ) @@ -214,7 +216,13 @@ class Series(base.IndexOpsMixin, generic.NDFrame): # Constructors def __init__( - self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False + self, + data=None, + index=None, + dtype: Optional[Dtype] = None, + name=None, + copy: bool = False, + fastpath: bool = False, ): if ( @@ -337,7 +345,7 @@ def __init__( self.name = name self._set_axis(0, index, fastpath=True) - def _init_dict(self, data, index=None, dtype=None): + def _init_dict(self, data, index=None, dtype: Optional[Dtype] = None): """ Derive the "_mgr" and "index" attributes of a new Series from a dictionary input. @@ -612,7 +620,7 @@ def __len__(self) -> int: """ return len(self._mgr) - def view(self, dtype=None) -> "Series": + def view(self, dtype: Optional[Dtype] = None) -> "Series": """ Create a new view of the Series. @@ -686,7 +694,7 @@ def view(self, dtype=None) -> "Series": # NDArray Compat _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) - def __array__(self, dtype=None) -> np.ndarray: + def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: """ Return the values as a NumPy array.