diff --git a/pandas/core/base.py b/pandas/core/base.py index ef7e59c9e19d7..d38dbec684f35 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,6 +8,7 @@ import numpy as np import pandas._libs.lib as lib +from pandas._typing import T from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -87,6 +88,14 @@ def __sizeof__(self): # object's 'sizeof' return super().__sizeof__() + def _ensure_type(self: T, obj) -> T: + """Ensure that an object has same type as self. + + Used by type checkers. + """ + assert isinstance(obj, type(self)), type(obj) + return obj + class NoNewAttributesMixin: """Mixin which prevents adding new attributes. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85bbf9b553b0a..d4e36a1894873 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -19,6 +19,7 @@ Sequence, Set, Tuple, + Type, Union, ) import warnings @@ -262,7 +263,7 @@ def _validate_dtype(self, dtype): # Construction @property - def _constructor(self): + def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: """Used when a manipulation result has the same dimensions as the original. """ @@ -298,7 +299,7 @@ def _constructor_expanddim(self): _AXIS_LEN: int @classmethod - def _setup_axes(cls, axes: List[str], docs: Dict[str, str]): + def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None: """ Provide axes setup for the major PandasObjects. @@ -373,7 +374,7 @@ def _construct_axes_from_arguments( return axes, kwargs @classmethod - def _from_axes(cls, data, axes, **kwargs): + def _from_axes(cls: Type[FrameOrSeries], data, axes, **kwargs) -> FrameOrSeries: # for construction from BlockManager if isinstance(data, BlockManager): return cls(data, **kwargs) @@ -486,7 +487,7 @@ def shape(self) -> Tuple[int, ...]: return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) @property - def axes(self): + def axes(self) -> List[Index]: """ Return index label(s) of the internal NDFrame """ @@ -639,11 +640,11 @@ def set_axis(self, labels, axis=0, inplace=False): obj.set_axis(labels, axis=axis, inplace=True) return obj - def _set_axis(self, axis, labels): + def _set_axis(self, axis, labels) -> None: self._data.set_axis(axis, labels) self._clear_item_cache() - def swapaxes(self, axis1, axis2, copy=True): + def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: """ Interchange axes and swap values axes appropriately. @@ -668,7 +669,7 @@ def swapaxes(self, axis1, axis2, copy=True): return self._constructor(new_values, *new_axes).__finalize__(self) - def droplevel(self, level, axis=0): + def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: """ Return DataFrame with requested index / column level(s) removed. @@ -728,7 +729,7 @@ def droplevel(self, level, axis=0): result = self.set_axis(new_labels, axis=axis, inplace=False) return result - def pop(self, item): + def pop(self: FrameOrSeries, item) -> FrameOrSeries: """ Return item and drop from frame. Raise KeyError if not found. @@ -889,7 +890,7 @@ def squeeze(self, axis=None): ) ] - def swaplevel(self, i=-2, j=-1, axis=0): + def swaplevel(self: FrameOrSeries, i=-2, j=-1, axis=0) -> FrameOrSeries: """ Swap levels i and j in a MultiIndex on a particular axis @@ -1473,10 +1474,10 @@ def bool(self): self.__nonzero__() - def __abs__(self): + def __abs__(self: FrameOrSeries) -> FrameOrSeries: return self.abs() - def __round__(self, decimals=0): + def __round__(self: FrameOrSeries, decimals: int = 0) -> FrameOrSeries: return self.round(decimals) # ------------------------------------------------------------------------- @@ -2123,7 +2124,7 @@ def to_excel( inf_rep="inf", verbose=True, freeze_panes=None, - ): + ) -> None: df = self if isinstance(self, ABCDataFrame) else self.to_frame() from pandas.io.formats.excel import ExcelFormatter @@ -2347,7 +2348,7 @@ def to_hdf( data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", - ): + ) -> None: """ Write the contained data to an HDF5 file using HDFStore. @@ -2691,7 +2692,9 @@ def to_pickle( to_pickle(self, path, compression=compression, protocol=protocol) - def to_clipboard(self, excel: bool_t = True, sep: Optional[str] = None, **kwargs): + def to_clipboard( + self, excel: bool_t = True, sep: Optional[str] = None, **kwargs + ) -> None: r""" Copy object to the system clipboard. @@ -3259,7 +3262,9 @@ def _clear_item_cache(self) -> None: # ---------------------------------------------------------------------- # Indexing Methods - def take(self, indices, axis=0, is_copy: bool_t = True, **kwargs): + def take( + self: FrameOrSeries, indices, axis=0, is_copy: bool_t = True, **kwargs + ) -> FrameOrSeries: """ Return the elements in the given *positional* indices along an axis. @@ -3542,7 +3547,7 @@ def _iget_item_cache(self, item): def _box_item_values(self, key, values): raise AbstractMethodError(self) - def _slice(self, slobj: slice, axis=0, kind=None): + def _slice(self: FrameOrSeries, slobj: slice, axis=0, kind=None) -> FrameOrSeries: """ Construct a slice of this container. @@ -3668,7 +3673,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): elif value == "warn": warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) - def __delitem__(self, key): + def __delitem__(self, key) -> None: """ Delete item """ @@ -3730,13 +3735,13 @@ def _is_view(self): return self._data.is_view def reindex_like( - self, + self: FrameOrSeries, other, method: Optional[str] = None, copy: bool_t = True, limit=None, tolerance=None, - ): + ) -> FrameOrSeries: """ Return an object with matching indices as other object. @@ -3878,7 +3883,9 @@ def drop( else: return obj - def _drop_axis(self, labels, axis, level=None, errors: str = "raise"): + def _drop_axis( + self: FrameOrSeries, labels, axis, level=None, errors: str = "raise" + ) -> FrameOrSeries: """ Drop labels from specified axis. Used in the ``drop`` method internally. @@ -3948,7 +3955,7 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: self._data = getattr(result, "_data", result) self._maybe_update_cacher(verify_is_copy=verify_is_copy) - def add_prefix(self, prefix: str): + def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: """ Prefix labels with string `prefix`. @@ -4007,7 +4014,7 @@ def add_prefix(self, prefix: str): mapper = {self._info_axis_name: f} return self.rename(**mapper) - def add_suffix(self, suffix: str): + def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: """ Suffix labels with string `suffix`. @@ -4227,7 +4234,7 @@ def sort_index( new_axis = labels.take(sort_index) return self.reindex(**{axis_name: new_axis}) - def reindex(self, *args, **kwargs): + def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: """ Conform %(klass)s to new index with optional filling logic. @@ -4475,7 +4482,9 @@ def reindex(self, *args, **kwargs): axes, level, limit, tolerance, method, fill_value, copy ).__finalize__(self) - def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy): + def _reindex_axes( + self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy + ) -> FrameOrSeries: """Perform the reindex for all the axes.""" obj = self for a in self._AXIS_ORDERS: @@ -4511,12 +4520,12 @@ def _reindex_multi(self, axes, copy, fill_value): raise AbstractMethodError(self) def _reindex_with_indexers( - self, + self: FrameOrSeries, reindexers, fill_value=None, copy: bool_t = False, allow_dups: bool_t = False, - ): + ) -> FrameOrSeries: """allow_dups indicates an internal call here """ # reindex doing multiple operations on different axes if indicated @@ -4548,12 +4557,12 @@ def _reindex_with_indexers( return self._constructor(new_data).__finalize__(self) def filter( - self, + self: FrameOrSeries, items=None, like: Optional[str] = None, regex: Optional[str] = None, axis=None, - ): + ) -> FrameOrSeries: """ Subset the dataframe rows or columns according to the specified index labels. @@ -4793,14 +4802,14 @@ def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: return self.iloc[-n:] def sample( - self, + self: FrameOrSeries, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None, - ): + ) -> FrameOrSeries: """ Return a random sample of items from an axis of object. @@ -5339,7 +5348,7 @@ def _get_bool_data(self): # Internal Interface Methods @property - def values(self): + def values(self) -> np.ndarray: """ Return a Numpy representation of the DataFrame. @@ -5416,16 +5425,16 @@ def values(self): return self._data.as_array(transpose=self._AXIS_REVERSED) @property - def _values(self): + def _values(self) -> np.ndarray: """internal implementation""" return self.values @property - def _get_values(self): + def _get_values(self) -> np.ndarray: # compat return self.values - def _internal_get_values(self): + def _internal_get_values(self) -> np.ndarray: """ Return an ndarray after converting sparse values to dense. @@ -5489,7 +5498,9 @@ def _to_dict_of_blocks(self, copy: bool_t = True): for k, v, in self._data.to_dict(copy=copy).items() } - def astype(self, dtype, copy: bool_t = True, errors: str = "raise"): + def astype( + self: FrameOrSeries, dtype, copy: bool_t = True, errors: str = "raise" + ) -> FrameOrSeries: """ Cast a pandas object to a specified dtype ``dtype``. @@ -5797,7 +5808,7 @@ def _convert( ) ).__finalize__(self) - def infer_objects(self): + def infer_objects(self: FrameOrSeries) -> FrameOrSeries: """ Attempt to infer better dtypes for object columns. @@ -7054,11 +7065,11 @@ def asof(self, where, subset=None): """ @Appender(_shared_docs["isna"] % _shared_doc_kwargs) - def isna(self): + def isna(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self) @Appender(_shared_docs["isna"] % _shared_doc_kwargs) - def isnull(self): + def isnull(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self) _shared_docs[ @@ -7124,11 +7135,11 @@ def isnull(self): """ @Appender(_shared_docs["notna"] % _shared_doc_kwargs) - def notna(self): + def notna(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self) @Appender(_shared_docs["notna"] % _shared_doc_kwargs) - def notnull(self): + def notnull(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self) def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): @@ -7180,14 +7191,14 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): return self.where(subset, threshold, axis=axis, inplace=inplace) def clip( - self, + self: FrameOrSeries, lower=None, upper=None, axis=None, inplace: bool_t = False, *args, **kwargs, - ): + ) -> FrameOrSeries: """ Trim values at input threshold(s). @@ -7364,13 +7375,13 @@ def clip( """ def asfreq( - self, + self: FrameOrSeries, freq, method=None, how: Optional[str] = None, normalize: bool_t = False, fill_value=None, - ): + ) -> FrameOrSeries: """ Convert TimeSeries to specified frequency. @@ -7473,7 +7484,9 @@ def asfreq( fill_value=fill_value, ) - def at_time(self, time, asof: bool_t = False, axis=None): + def at_time( + self: FrameOrSeries, time, asof: bool_t = False, axis=None + ) -> FrameOrSeries: """ Select values at particular time of day (e.g. 9:30AM). @@ -7530,13 +7543,13 @@ def at_time(self, time, asof: bool_t = False, axis=None): return self.take(indexer, axis=axis) def between_time( - self, + self: FrameOrSeries, start_time, end_time, include_start: bool_t = True, include_end: bool_t = True, axis=None, - ): + ) -> FrameOrSeries: """ Select values between particular times of the day (e.g., 9:00-9:30 AM). @@ -7905,7 +7918,7 @@ def resample( level=level, ) - def first(self, offset): + def first(self: FrameOrSeries, offset) -> FrameOrSeries: """ Method to subset initial periods of time series data based on a date offset. @@ -7967,7 +7980,7 @@ def first(self, offset): return self.loc[:end] - def last(self, offset): + def last(self: FrameOrSeries, offset) -> FrameOrSeries: """ Method to subset final periods of time series data based on a date offset. @@ -8326,8 +8339,12 @@ def _align_frame( ) if method is not None: - left = left.fillna(axis=fill_axis, method=method, limit=limit) - right = right.fillna(axis=fill_axis, method=method, limit=limit) + left = self._ensure_type( + left.fillna(method=method, axis=fill_axis, limit=limit) + ) + right = self._ensure_type( + right.fillna(method=method, axis=fill_axis, limit=limit) + ) # if DatetimeIndex have different tz, convert to UTC if is_datetime64tz_dtype(left.index): @@ -8820,7 +8837,9 @@ def mask( """ @Appender(_shared_docs["shift"] % _shared_doc_kwargs) - def shift(self, periods=1, freq=None, axis=0, fill_value=None): + def shift( + self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None + ) -> FrameOrSeries: if periods == 0: return self.copy() @@ -8871,7 +8890,9 @@ def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: return new_obj.__finalize__(self) - def tshift(self, periods: int = 1, freq=None, axis=0): + def tshift( + self: FrameOrSeries, periods: int = 1, freq=None, axis=0 + ) -> FrameOrSeries: """ Shift the time index, using the index's frequency if available. @@ -9314,7 +9335,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): # ---------------------------------------------------------------------- # Numeric Methods - def abs(self): + def abs(self: FrameOrSeries) -> FrameOrSeries: """ Return a Series/DataFrame with absolute numeric value of each element. @@ -9383,7 +9404,9 @@ def abs(self): """ return np.abs(self) - def describe(self, percentiles=None, include=None, exclude=None): + def describe( + self: FrameOrSeries, percentiles=None, include=None, exclude=None + ) -> FrameOrSeries: """ Generate descriptive statistics. @@ -9719,7 +9742,7 @@ def describe_1d(data): ldesc = [describe_1d(s) for _, s in data.items()] # set a convenient order for rows - names = [] + names: List[Optional[Hashable]] = [] ldesc_indexes = sorted((x.index for x in ldesc), key=len) for idxnames in ldesc_indexes: for name in idxnames: @@ -9848,13 +9871,22 @@ def describe_1d(data): """ @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) - def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwargs): + def pct_change( + self: FrameOrSeries, + periods=1, + fill_method="pad", + limit=None, + freq=None, + **kwargs, + ) -> FrameOrSeries: # TODO: Not sure if above is correct - need someone to confirm. axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) if fill_method is None: data = self else: - data = self.fillna(method=fill_method, limit=limit, axis=axis) + data = self._ensure_type( + self.fillna(method=fill_method, axis=axis, limit=limit) + ) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 rs = rs.loc[~rs.index.duplicated()] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 2eb2990bd58c4..c544c132d6921 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -148,9 +148,7 @@ def pivot_table( table = table.sort_index(axis=1) if fill_value is not None: - filled = table.fillna(value=fill_value, downcast="infer") - assert filled is not None # needed for mypy - table = filled + table = table._ensure_type(table.fillna(fill_value, downcast="infer")) if margins: if dropna: