From 3ee69ba5c1028a31c2df3f3f15a1450f0e9db1b0 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 31 Dec 2019 13:26:49 +0000 Subject: [PATCH 1/6] Add types to more top-level funcs --- pandas/core/algorithms.py | 9 ++++++--- pandas/core/frame.py | 4 ++-- pandas/core/reshape/concat.py | 4 ++-- pandas/core/reshape/melt.py | 4 +++- pandas/core/reshape/pivot.py | 6 ++++-- pandas/core/reshape/reshape.py | 9 ++++++--- pandas/io/formats/format.py | 12 +++++++++--- pandas/io/pytables.py | 4 +++- 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 42cfd9d54ac19..6edc235f8877b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,7 +3,7 @@ intended for public consumption """ from textwrap import dedent -from typing import Dict, Optional, Tuple, Union +from typing import Dict, Optional, TYPE_CHECKING, Tuple, Union from warnings import catch_warnings, simplefilter, warn import numpy as np @@ -50,6 +50,9 @@ from pandas.core.construction import array, extract_array from pandas.core.indexers import validate_indices +if TYPE_CHECKING: + from pandas import Series + _shared_docs: Dict[str, str] = {} @@ -651,7 +654,7 @@ def value_counts( normalize: bool = False, bins=None, dropna: bool = True, -) -> ABCSeries: +) -> "Series": """ Compute a histogram of the counts of non-null values. @@ -793,7 +796,7 @@ def duplicated(values, keep="first") -> np.ndarray: return f(values, keep=keep) -def mode(values, dropna: bool = True) -> ABCSeries: +def mode(values, dropna: bool = True) -> "Series": """ Returns the mode(s) of an array. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97b218878f4cc..ba0c0e7d66b1d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5878,7 +5878,7 @@ def groupby( @Substitution("") @Appender(_shared_docs["pivot"]) - def pivot(self, index=None, columns=None, values=None): + def pivot(self, index=None, columns=None, values=None) -> "DataFrame": from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) @@ -6025,7 +6025,7 @@ def pivot_table( dropna=True, margins_name="All", observed=False, - ): + ) -> "DataFrame": from pandas.core.reshape.pivot import pivot_table return pivot_table( diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 8886732fc8d79..ccc1e86657801 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,7 +2,7 @@ concat routines """ -from typing import Hashable, List, Optional +from typing import Hashable, List, Optional, Union import numpy as np @@ -37,7 +37,7 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, -): +) -> Union["DataFrame", "Series"]: """ Concatenate pandas objects along a particular axis with optional set logic along the other axes. diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 38bda94489d01..722dd8751dfad 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -192,7 +192,9 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr return data._constructor(mdata, columns=id_cols + pivot_cols) -def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): +def wide_to_long( + df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> DataFrame: r""" Wide panel to long format. Less flexible but more user-friendly than melt. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c544c132d6921..3b68a141ad630 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -58,7 +58,9 @@ def pivot_table( pieces.append(table) keys.append(getattr(func, "__name__", func)) - return concat(pieces, keys=keys, axis=1) + result = concat(pieces, keys=keys, axis=1) + assert isinstance(result, DataFrame) + return result keys = index + columns @@ -459,7 +461,7 @@ def crosstab( margins_name: str = "All", dropna: bool = True, normalize=False, -): +) -> "DataFrame": """ Compute a simple cross tabulation of two (or more) factors. By default computes a frequency table of the factors unless an array of values and an diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 004bd0199eb58..c86df4c8d29b6 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,5 +1,6 @@ from functools import partial import itertools +from typing import List import numpy as np @@ -755,7 +756,7 @@ def get_dummies( sparse=False, drop_first=False, dtype=None, -): +) -> "DataFrame": """ Convert categorical variable into dummy/indicator variables. @@ -899,7 +900,7 @@ def check_len(item, name): if data_to_encode.shape == data.shape: # Encoding the entire df, do not prepend any dropped columns - with_dummies = [] + with_dummies: List[DataFrame] = [] elif columns is not None: # Encoding only cols specified in columns. Get all cols not in # columns to prepend to result. @@ -921,7 +922,9 @@ def check_len(item, name): dtype=dtype, ) with_dummies.append(dummy) - result = concat(with_dummies, axis=1) + concatted = concat(with_dummies, axis=1) + assert isinstance(concatted, DataFrame) + result = concatted else: result = _get_dummies_1d( data, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ea22999470102..ff6ea8dc79957 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -281,7 +281,9 @@ def _chk_truncate(self) -> None: series = series.iloc[:max_rows] else: row_num = max_rows // 2 - series = concat((series.iloc[:row_num], series.iloc[-row_num:])) + concatted = concat((series.iloc[:row_num], series.iloc[-row_num:])) + assert isinstance(concatted, Series) + series = concatted self.tr_row_num = row_num else: self.tr_row_num = None @@ -676,9 +678,11 @@ def _chk_truncate(self) -> None: col_num = max_cols else: col_num = max_cols_adj // 2 - frame = concat( + concatted = concat( (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) + assert isinstance(concatted, DataFrame) + frame = concatted # truncate formatter if isinstance(self.formatters, (list, tuple)): truncate_fmt = self.formatters @@ -695,7 +699,9 @@ def _chk_truncate(self) -> None: frame = frame.iloc[:max_rows, :] else: row_num = max_rows_adj // 2 - frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) + concatted = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) + assert isinstance(concatted, DataFrame) + frame = concatted self.tr_row_num = row_num else: self.tr_row_num = None diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3d2c2159bfbdd..d0e5a87c01fe6 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4432,7 +4432,9 @@ def read( if len(frames) == 1: df = frames[0] else: - df = concat(frames, axis=1) + concatted = concat(frames, axis=1) + assert isinstance(concatted, DataFrame) + df = concatted selection = Selection(self, where=where, start=start, stop=stop) # apply the selection filters & axis orderings From b8370e3a6bd0fbbb863c936e71cfa77f7d078e5b Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 31 Dec 2019 14:29:22 +0000 Subject: [PATCH 2/6] overload concat function --- pandas/core/algorithms.py | 2 +- pandas/core/reshape/concat.py | 38 ++++++++++++++++++++++++++++++++-- pandas/core/reshape/pivot.py | 8 +++---- pandas/core/reshape/reshape.py | 4 +--- pandas/io/formats/format.py | 9 +++----- pandas/io/pytables.py | 4 +--- 6 files changed, 45 insertions(+), 20 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6edc235f8877b..39e8e9008a844 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,7 +3,7 @@ intended for public consumption """ from textwrap import dedent -from typing import Dict, Optional, TYPE_CHECKING, Tuple, Union +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union from warnings import catch_warnings, simplefilter, warn import numpy as np diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index ccc1e86657801..4d4c147b063f8 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,7 +2,7 @@ concat routines """ -from typing import Hashable, List, Optional, Union +from typing import Hashable, List, Mapping, Optional, Sequence, Union, overload import numpy as np @@ -25,9 +25,28 @@ # --------------------------------------------------------------------- # Concatenate DataFrame objects +FrameOrSeriesUnion = Union["DataFrame", "Series"] + +@overload +def concat( + objs: Union[Sequence["DataFrame"], Mapping[str, "DataFrame"]], + axis=0, + join: str = "outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> "DataFrame": + ... + + +@overload def concat( - objs, + objs: Union[Sequence[FrameOrSeriesUnion], Mapping[str, FrameOrSeriesUnion]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -37,6 +56,21 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, +) -> Union["DataFrame", "Series"]: + ... + + +def concat( + objs: Union[Sequence[FrameOrSeriesUnion], Mapping[str, FrameOrSeriesUnion]], + axis=0, + join="outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, ) -> Union["DataFrame", "Series"]: """ Concatenate pandas objects along a particular axis with optional set logic diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 3b68a141ad630..b443ba142369c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Callable, Dict, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Tuple, Union import numpy as np @@ -40,7 +40,7 @@ def pivot_table( columns = _convert_by(columns) if isinstance(aggfunc, list): - pieces = [] + pieces: List[DataFrame] = [] keys = [] for func in aggfunc: table = pivot_table( @@ -58,9 +58,7 @@ def pivot_table( pieces.append(table) keys.append(getattr(func, "__name__", func)) - result = concat(pieces, keys=keys, axis=1) - assert isinstance(result, DataFrame) - return result + return concat(pieces, keys=keys, axis=1) keys = index + columns diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c86df4c8d29b6..da92e1154556a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -922,9 +922,7 @@ def check_len(item, name): dtype=dtype, ) with_dummies.append(dummy) - concatted = concat(with_dummies, axis=1) - assert isinstance(concatted, DataFrame) - result = concatted + result = concat(with_dummies, axis=1) else: result = _get_dummies_1d( data, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ff6ea8dc79957..94259c5ecdba1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -260,6 +260,7 @@ def __init__( self._chk_truncate() def _chk_truncate(self) -> None: + from pandas.core.series import Series from pandas.core.reshape.concat import concat self.tr_row_num: Optional[int] @@ -678,11 +679,9 @@ def _chk_truncate(self) -> None: col_num = max_cols else: col_num = max_cols_adj // 2 - concatted = concat( + frame = concat( (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) - assert isinstance(concatted, DataFrame) - frame = concatted # truncate formatter if isinstance(self.formatters, (list, tuple)): truncate_fmt = self.formatters @@ -699,9 +698,7 @@ def _chk_truncate(self) -> None: frame = frame.iloc[:max_rows, :] else: row_num = max_rows_adj // 2 - concatted = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) - assert isinstance(concatted, DataFrame) - frame = concatted + frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) self.tr_row_num = row_num else: self.tr_row_num = None diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d0e5a87c01fe6..3d2c2159bfbdd 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4432,9 +4432,7 @@ def read( if len(frames) == 1: df = frames[0] else: - concatted = concat(frames, axis=1) - assert isinstance(concatted, DataFrame) - df = concatted + df = concat(frames, axis=1) selection = Selection(self, where=where, start=start, stop=stop) # apply the selection filters & axis orderings From 0b935ce9442647ec2a8a5b7812a3f27fa46d5902 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 31 Dec 2019 17:58:45 +0000 Subject: [PATCH 3/6] Add _typing.FrameOrSeries --- pandas/_testing.py | 6 ++-- pandas/_typing.py | 6 ++-- pandas/core/generic.py | 54 +++++++++++++++++----------------- pandas/core/groupby/generic.py | 6 ++-- pandas/core/groupby/groupby.py | 6 ++-- pandas/core/groupby/grouper.py | 10 +++---- pandas/core/groupby/ops.py | 18 ++++++------ pandas/core/reshape/concat.py | 14 ++++----- pandas/core/reshape/merge.py | 4 +-- pandas/core/window/rolling.py | 4 +-- pandas/io/pytables.py | 12 ++++---- 11 files changed, 72 insertions(+), 68 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 2ebebc5d5e10a..4032e2d1b4cbe 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -22,7 +22,7 @@ ) import pandas._libs.testing as _testing -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesT from pandas.compat import _get_lzma_file, _import_lzma from pandas.core.dtypes.common import ( @@ -101,7 +101,9 @@ def reset_display_options(): pd.reset_option("^display.", silent=True) -def round_trip_pickle(obj: FrameOrSeries, path: Optional[str] = None) -> FrameOrSeries: +def round_trip_pickle( + obj: FrameOrSeriesT, path: Optional[str] = None +) -> FrameOrSeriesT: """ Pickle an object and then read it again. diff --git a/pandas/_typing.py b/pandas/_typing.py index 7b89486751f12..f615e6e50ff10 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -21,9 +21,10 @@ from pandas.core.arrays.base import ExtensionArray # noqa: F401 from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401 from pandas.core.indexes.base import Index # noqa: F401 - from pandas.core.series import Series # noqa: F401 from pandas.core.generic import NDFrame # noqa: F401 from pandas import Interval # noqa: F401 + from pandas.core.series import Series # noqa: F401 + from pandas.core.frame import DataFrame # noqa: F401 # array-like @@ -41,7 +42,8 @@ Dtype = Union[str, np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] -FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame") +FrameOrSeries = Union["DataFrame", "Series"] +FrameOrSeriesT = TypeVar("FrameOrSeriesT", bound="NDFrame") Axis = Union[str, int] Ordered = Optional[bool] JSONSerializable = Union[PythonScalar, List, Dict] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3b8e9cf82f08c..c4ba8216710e0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,7 @@ from pandas._config import config from pandas._libs import Timestamp, iNaT, lib, properties -from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries, JSONSerializable +from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeriesT, JSONSerializable from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -552,12 +552,12 @@ def size(self): return np.prod(self.shape) @property - def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: + def _selected_obj(self: FrameOrSeriesT) -> FrameOrSeriesT: """ internal compat with SelectionMixin """ return self @property - def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: + def _obj_with_exclusions(self: FrameOrSeriesT) -> FrameOrSeriesT: """ internal compat with SelectionMixin """ return self @@ -4670,7 +4670,7 @@ def f(x): else: raise TypeError("Must pass either `items`, `like`, or `regex`") - def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: + def head(self: FrameOrSeriesT, n: int = 5) -> FrameOrSeriesT: """ Return the first `n` rows. @@ -4743,7 +4743,7 @@ def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: return self.iloc[:n] - def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: + def tail(self: FrameOrSeriesT, n: int = 5) -> FrameOrSeriesT: """ Return the last `n` rows. @@ -5188,8 +5188,8 @@ def pipe(self, func, *args, **kwargs): # Attribute access def __finalize__( - self: FrameOrSeries, other, method=None, **kwargs - ) -> FrameOrSeries: + self: FrameOrSeriesT, other, method=None, **kwargs + ) -> FrameOrSeriesT: """ Propagate metadata from other to self. @@ -5658,7 +5658,7 @@ def astype( result.columns = self.columns return result - def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: + def copy(self: FrameOrSeriesT, deep: bool_t = True) -> FrameOrSeriesT: """ Make a copy of this object's indices and data. @@ -5766,10 +5766,10 @@ def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) - def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: + def __copy__(self: FrameOrSeriesT, deep: bool_t = True) -> FrameOrSeriesT: return self.copy(deep=deep) - def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: + def __deepcopy__(self: FrameOrSeriesT, memo=None) -> FrameOrSeriesT: """ Parameters ---------- @@ -5779,13 +5779,13 @@ def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: return self.copy(deep=True) def _convert( - self: FrameOrSeries, + self: FrameOrSeriesT, datetime: bool_t = False, numeric: bool_t = False, timedelta: bool_t = False, coerce: bool_t = False, copy: bool_t = True, - ) -> FrameOrSeries: + ) -> FrameOrSeriesT: """ Attempt to infer better dtype for object columns @@ -5877,14 +5877,14 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries: # Filling NA's def fillna( - self: FrameOrSeries, + self: FrameOrSeriesT, value=None, method=None, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> Optional[FrameOrSeriesT]: """ Fill NA/NaN values using the specified method. @@ -6066,12 +6066,12 @@ def fillna( return self._constructor(new_data).__finalize__(self) def ffill( - self: FrameOrSeries, + self: FrameOrSeriesT, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> Optional[FrameOrSeriesT]: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. @@ -6085,12 +6085,12 @@ def ffill( ) def bfill( - self: FrameOrSeries, + self: FrameOrSeriesT, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeries]: + ) -> Optional[FrameOrSeriesT]: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. @@ -8055,14 +8055,14 @@ def last(self: FrameOrSeries, offset) -> FrameOrSeries: return self.iloc[start:] def rank( - self: FrameOrSeries, + self: FrameOrSeriesT, axis=0, method: str = "average", numeric_only: Optional[bool_t] = None, na_option: str = "keep", ascending: bool_t = True, pct: bool_t = False, - ) -> FrameOrSeries: + ) -> FrameOrSeriesT: """ Compute numerical data ranks (1 through n) along axis. @@ -8870,7 +8870,7 @@ def shift( return self._constructor(new_data).__finalize__(self) - def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: + def slice_shift(self: FrameOrSeriesT, periods: int = 1, axis=0) -> FrameOrSeriesT: """ Equivalent to `shift` without copying data. @@ -8970,8 +8970,8 @@ def tshift( return self._constructor(new_data).__finalize__(self) def truncate( - self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True - ) -> FrameOrSeries: + self: FrameOrSeriesT, before=None, after=None, axis=None, copy: bool_t = True + ) -> FrameOrSeriesT: """ Truncate a Series or DataFrame before and after some index value. @@ -9124,8 +9124,8 @@ def truncate( return result def tz_convert( - self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True - ) -> FrameOrSeries: + self: FrameOrSeriesT, tz, axis=0, level=None, copy: bool_t = True + ) -> FrameOrSeriesT: """ Convert tz-aware axis to target time zone. @@ -9181,14 +9181,14 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize( - self: FrameOrSeries, + self: FrameOrSeriesT, tz, axis=0, level=None, copy: bool_t = True, ambiguous="raise", nonexistent: str = "raise", - ) -> FrameOrSeries: + ) -> FrameOrSeriesT: """ Localize tz-naive index of a Series or DataFrame to target time zone. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c49677fa27a31..26c0b12b6e854 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import Timestamp, lib -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesT from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import ( @@ -86,7 +86,7 @@ ScalarResult = typing.TypeVar("ScalarResult") -def generate_property(name: str, klass: Type[FrameOrSeries]): +def generate_property(name: str, klass: Type[FrameOrSeriesT]): """ Create a property for a GroupBy subclass to dispatch to DataFrame/Series. @@ -109,7 +109,7 @@ def prop(self): return property(prop) -def pin_whitelisted_properties(klass: Type[FrameOrSeries], whitelist: FrozenSet[str]): +def pin_whitelisted_properties(klass: Type[FrameOrSeriesT], whitelist: FrozenSet[str]): """ Create GroupBy member defs for DataFrame/Series names in a whitelist. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1ba4938d45fc9..ab8df19a5031d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -33,7 +33,7 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby -from pandas._typing import FrameOrSeries, Scalar +from pandas._typing import FrameOrSeriesT, Scalar from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -2439,8 +2439,8 @@ def tail(self, n=5): return self._selected_obj[mask] def _reindex_output( - self, output: FrameOrSeries, fill_value: Scalar = np.NaN - ) -> FrameOrSeries: + self, output: FrameOrSeriesT, fill_value: Scalar = np.NaN + ) -> FrameOrSeriesT: """ If we have categorical groupers, then we might want to make sure that we have a fully re-indexed output to the levels. This means expanding diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 7e7261130ff4a..a57014bab5892 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesT from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -141,7 +141,7 @@ def _get_grouper(self, obj, validate: bool = True): ) return self.binner, self.grouper, self.obj - def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): + def _set_grouper(self, obj: FrameOrSeriesT, sort: bool = False): """ given an object and the specifications, setup the internal grouper for this particular specification @@ -244,7 +244,7 @@ def __init__( self, index: Index, grouper=None, - obj: Optional[FrameOrSeries] = None, + obj: Optional[FrameOrSeriesT] = None, name=None, level=None, sort: bool = True, @@ -424,7 +424,7 @@ def groups(self) -> Dict[Hashable, np.ndarray]: def get_grouper( - obj: FrameOrSeries, + obj: FrameOrSeriesT, key=None, axis: int = 0, level=None, @@ -432,7 +432,7 @@ def get_grouper( observed: bool = False, mutated: bool = False, validate: bool = True, -) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]": +) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeriesT]": """ Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 37067a1897a52..f357a9d4a5950 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -14,7 +14,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesT from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -111,7 +111,7 @@ def __iter__(self): def nkeys(self) -> int: return len(self.groupings) - def get_iterator(self, data: FrameOrSeries, axis: int = 0): + def get_iterator(self, data: FrameOrSeriesT, axis: int = 0): """ Groupby iterator @@ -125,7 +125,7 @@ def get_iterator(self, data: FrameOrSeries, axis: int = 0): for key, (i, group) in zip(keys, splitter): yield key, group - def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter": + def _get_splitter(self, data: FrameOrSeriesT, axis: int = 0) -> "DataSplitter": comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) @@ -147,13 +147,13 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply(self, f, data: FrameOrSeries, axis: int = 0): + def apply(self, f, data: FrameOrSeriesT, axis: int = 0): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() result_values = None - sdata: FrameOrSeries = splitter._get_sorted_data() + sdata: FrameOrSeriesT = splitter._get_sorted_data() if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray @@ -754,7 +754,7 @@ def _get_grouper(self): """ return self - def get_iterator(self, data: FrameOrSeries, axis: int = 0): + def get_iterator(self, data: FrameOrSeriesT, axis: int = 0): """ Groupby iterator @@ -862,7 +862,7 @@ def _is_indexed_like(obj, axes) -> bool: class DataSplitter: - def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): + def __init__(self, data: FrameOrSeriesT, labels, ngroups: int, axis: int = 0): self.data = data self.labels = ensure_int64(labels) self.ngroups = ngroups @@ -893,7 +893,7 @@ def __iter__(self): for i, (start, end) in enumerate(zip(starts, ends)): yield i, self._chop(sdata, slice(start, end)) - def _get_sorted_data(self) -> FrameOrSeries: + def _get_sorted_data(self) -> FrameOrSeriesT: return self.data.take(self.sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj: slice) -> NDFrame: @@ -920,7 +920,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: return sdata._slice(slice_obj, axis=1) -def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: +def get_splitter(data: FrameOrSeriesT, *args, **kwargs) -> DataSplitter: if isinstance(data, Series): klass: Type[DataSplitter] = SeriesSplitter else: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4d4c147b063f8..afbdef7c8f132 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,6 +6,8 @@ import numpy as np +from pandas._typing import FrameOrSeries + from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.arrays.categorical import ( factorize_from_iterable, @@ -25,12 +27,10 @@ # --------------------------------------------------------------------- # Concatenate DataFrame objects -FrameOrSeriesUnion = Union["DataFrame", "Series"] - @overload def concat( - objs: Union[Sequence["DataFrame"], Mapping[str, "DataFrame"]], + objs: Union[Sequence["DataFrame"], Mapping[Hashable, "DataFrame"]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -46,7 +46,7 @@ def concat( @overload def concat( - objs: Union[Sequence[FrameOrSeriesUnion], Mapping[str, FrameOrSeriesUnion]], + objs: Union[Sequence[FrameOrSeries], Mapping[Hashable, FrameOrSeries]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -56,12 +56,12 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, -) -> Union["DataFrame", "Series"]: +) -> FrameOrSeries: ... def concat( - objs: Union[Sequence[FrameOrSeriesUnion], Mapping[str, FrameOrSeriesUnion]], + objs: Union[Sequence[FrameOrSeries], Mapping[Hashable, FrameOrSeries]], axis=0, join="outer", ignore_index: bool = False, @@ -71,7 +71,7 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, -) -> Union["DataFrame", "Series"]: +) -> FrameOrSeries: """ Concatenate pandas objects along a particular axis with optional set logic along the other axes. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6fe2287923fcb..0e9a3fa0ffcde 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -13,7 +13,7 @@ from pandas._libs import Timedelta, hashtable as libhashtable, lib import pandas._libs.join as libjoin -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesT from pandas.errors import MergeError from pandas.util._decorators import Appender, Substitution @@ -1994,7 +1994,7 @@ def _any(x) -> bool: return x is not None and com.any_not_none(*x) -def _validate_operand(obj: FrameOrSeries) -> "DataFrame": +def _validate_operand(obj: FrameOrSeriesT) -> "DataFrame": if isinstance(obj, ABCDataFrame): return obj elif isinstance(obj, ABCSeries): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c3c3e61f222df..eb0e53d6a1c59 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -11,7 +11,7 @@ import numpy as np import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import Axis, FrameOrSeries, Scalar +from pandas._typing import Axis, FrameOrSeriesT, Scalar from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -324,7 +324,7 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: + def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeriesT: """ Wrap the results. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3d2c2159bfbdd..6f554c69dd1ee 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -27,7 +27,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, FrameOrSeries +from pandas._typing import ArrayLike, FrameOrSeriesT from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -231,7 +231,7 @@ def _tables(): def to_hdf( path_or_buf, key: str, - value: FrameOrSeries, + value: FrameOrSeriesT, mode: str = "a", complevel: Optional[int] = None, complib: Optional[str] = None, @@ -986,7 +986,7 @@ def func(_start, _stop, _where): def put( self, key: str, - value: FrameOrSeries, + value: FrameOrSeriesT, format=None, index=True, append=False, @@ -1102,7 +1102,7 @@ def remove(self, key: str, where=None, start=None, stop=None): def append( self, key: str, - value: FrameOrSeries, + value: FrameOrSeriesT, format=None, axes=None, index=True, @@ -1537,7 +1537,7 @@ def _create_storer( self, group, format=None, - value: Optional[FrameOrSeries] = None, + value: Optional[FrameOrSeriesT] = None, encoding: str = "UTF-8", errors: str = "strict", ) -> Union["GenericFixed", "Table"]: @@ -1630,7 +1630,7 @@ def error(t): def _write_to_group( self, key: str, - value: FrameOrSeries, + value: FrameOrSeriesT, format, axes=None, index=True, From 2926047e859f8a65e647d425dcbdd728ecd6514c Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 1 Jan 2020 21:23:44 +0000 Subject: [PATCH 4/6] Explain typed etc. --- pandas/_testing.py | 6 ++-- pandas/_typing.py | 10 +++++- pandas/core/generic.py | 56 ++++++++++++++++++---------------- pandas/core/groupby/generic.py | 8 +++-- pandas/core/groupby/groupby.py | 6 ++-- pandas/core/groupby/grouper.py | 10 +++--- pandas/core/groupby/ops.py | 18 +++++------ pandas/core/reshape/concat.py | 6 ++-- pandas/core/reshape/merge.py | 4 +-- pandas/core/window/rolling.py | 4 +-- pandas/io/pytables.py | 12 ++++---- 11 files changed, 76 insertions(+), 64 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 4032e2d1b4cbe..c59c1fb7626e7 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -22,7 +22,7 @@ ) import pandas._libs.testing as _testing -from pandas._typing import FrameOrSeriesT +from pandas._typing import SameFrameOrSeries from pandas.compat import _get_lzma_file, _import_lzma from pandas.core.dtypes.common import ( @@ -102,8 +102,8 @@ def reset_display_options(): def round_trip_pickle( - obj: FrameOrSeriesT, path: Optional[str] = None -) -> FrameOrSeriesT: + obj: SameFrameOrSeries, path: Optional[str] = None +) -> SameFrameOrSeries: """ Pickle an object and then read it again. diff --git a/pandas/_typing.py b/pandas/_typing.py index f615e6e50ff10..0f1be4517b201 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -42,8 +42,16 @@ Dtype = Union[str, np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] +# FrameOrSeries means either a DataFrame or a Series. E.g. +# `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a Series is passed +# in, either a Series or DataFrame is returned, and if a DataFrame is passed in, either +# a DataFrame or a Series is returned. FrameOrSeries = Union["DataFrame", "Series"] -FrameOrSeriesT = TypeVar("FrameOrSeriesT", bound="NDFrame") +# SameFrameOrSeries is stricter and ensures that the same subclass of NDFrame always is +# used. E.g. `def func(a: SameFrameOrSeries) -> SameFrameOrSeries: ...` means that if a +# Series is passed into a function, a Series is always returned and if a DataFrame is +# passed in, a DataFrame is always returned. +SameFrameOrSeries = TypeVar("SameFrameOrSeries", bound="NDFrame") Axis = Union[str, int] Ordered = Optional[bool] JSONSerializable = Union[PythonScalar, List, Dict] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c4ba8216710e0..f41a6b6447ecd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,7 @@ from pandas._config import config from pandas._libs import Timestamp, iNaT, lib, properties -from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeriesT, JSONSerializable +from pandas._typing import Dtype, FilePathOrBuffer, JSONSerializable, SameFrameOrSeries from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -552,12 +552,12 @@ def size(self): return np.prod(self.shape) @property - def _selected_obj(self: FrameOrSeriesT) -> FrameOrSeriesT: + def _selected_obj(self: SameFrameOrSeries) -> SameFrameOrSeries: """ internal compat with SelectionMixin """ return self @property - def _obj_with_exclusions(self: FrameOrSeriesT) -> FrameOrSeriesT: + def _obj_with_exclusions(self: SameFrameOrSeries) -> SameFrameOrSeries: """ internal compat with SelectionMixin """ return self @@ -4670,7 +4670,7 @@ def f(x): else: raise TypeError("Must pass either `items`, `like`, or `regex`") - def head(self: FrameOrSeriesT, n: int = 5) -> FrameOrSeriesT: + def head(self: SameFrameOrSeries, n: int = 5) -> SameFrameOrSeries: """ Return the first `n` rows. @@ -4743,7 +4743,7 @@ def head(self: FrameOrSeriesT, n: int = 5) -> FrameOrSeriesT: return self.iloc[:n] - def tail(self: FrameOrSeriesT, n: int = 5) -> FrameOrSeriesT: + def tail(self: SameFrameOrSeries, n: int = 5) -> SameFrameOrSeries: """ Return the last `n` rows. @@ -5188,8 +5188,8 @@ def pipe(self, func, *args, **kwargs): # Attribute access def __finalize__( - self: FrameOrSeriesT, other, method=None, **kwargs - ) -> FrameOrSeriesT: + self: SameFrameOrSeries, other, method=None, **kwargs + ) -> SameFrameOrSeries: """ Propagate metadata from other to self. @@ -5658,7 +5658,7 @@ def astype( result.columns = self.columns return result - def copy(self: FrameOrSeriesT, deep: bool_t = True) -> FrameOrSeriesT: + def copy(self: SameFrameOrSeries, deep: bool_t = True) -> SameFrameOrSeries: """ Make a copy of this object's indices and data. @@ -5766,10 +5766,10 @@ def copy(self: FrameOrSeriesT, deep: bool_t = True) -> FrameOrSeriesT: data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) - def __copy__(self: FrameOrSeriesT, deep: bool_t = True) -> FrameOrSeriesT: + def __copy__(self: SameFrameOrSeries, deep: bool_t = True) -> SameFrameOrSeries: return self.copy(deep=deep) - def __deepcopy__(self: FrameOrSeriesT, memo=None) -> FrameOrSeriesT: + def __deepcopy__(self: SameFrameOrSeries, memo=None) -> SameFrameOrSeries: """ Parameters ---------- @@ -5779,13 +5779,13 @@ def __deepcopy__(self: FrameOrSeriesT, memo=None) -> FrameOrSeriesT: return self.copy(deep=True) def _convert( - self: FrameOrSeriesT, + self: SameFrameOrSeries, datetime: bool_t = False, numeric: bool_t = False, timedelta: bool_t = False, coerce: bool_t = False, copy: bool_t = True, - ) -> FrameOrSeriesT: + ) -> SameFrameOrSeries: """ Attempt to infer better dtype for object columns @@ -5877,14 +5877,14 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries: # Filling NA's def fillna( - self: FrameOrSeriesT, + self: SameFrameOrSeries, value=None, method=None, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeriesT]: + ) -> Optional[SameFrameOrSeries]: """ Fill NA/NaN values using the specified method. @@ -6066,12 +6066,12 @@ def fillna( return self._constructor(new_data).__finalize__(self) def ffill( - self: FrameOrSeriesT, + self: SameFrameOrSeries, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeriesT]: + ) -> Optional[SameFrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. @@ -6085,12 +6085,12 @@ def ffill( ) def bfill( - self: FrameOrSeriesT, + self: SameFrameOrSeries, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[FrameOrSeriesT]: + ) -> Optional[SameFrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. @@ -8055,14 +8055,14 @@ def last(self: FrameOrSeries, offset) -> FrameOrSeries: return self.iloc[start:] def rank( - self: FrameOrSeriesT, + self: SameFrameOrSeries, axis=0, method: str = "average", numeric_only: Optional[bool_t] = None, na_option: str = "keep", ascending: bool_t = True, pct: bool_t = False, - ) -> FrameOrSeriesT: + ) -> SameFrameOrSeries: """ Compute numerical data ranks (1 through n) along axis. @@ -8870,7 +8870,9 @@ def shift( return self._constructor(new_data).__finalize__(self) - def slice_shift(self: FrameOrSeriesT, periods: int = 1, axis=0) -> FrameOrSeriesT: + def slice_shift( + self: SameFrameOrSeries, periods: int = 1, axis=0 + ) -> SameFrameOrSeries: """ Equivalent to `shift` without copying data. @@ -8970,8 +8972,8 @@ def tshift( return self._constructor(new_data).__finalize__(self) def truncate( - self: FrameOrSeriesT, before=None, after=None, axis=None, copy: bool_t = True - ) -> FrameOrSeriesT: + self: SameFrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True + ) -> SameFrameOrSeries: """ Truncate a Series or DataFrame before and after some index value. @@ -9124,8 +9126,8 @@ def truncate( return result def tz_convert( - self: FrameOrSeriesT, tz, axis=0, level=None, copy: bool_t = True - ) -> FrameOrSeriesT: + self: SameFrameOrSeries, tz, axis=0, level=None, copy: bool_t = True + ) -> SameFrameOrSeries: """ Convert tz-aware axis to target time zone. @@ -9181,14 +9183,14 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize( - self: FrameOrSeriesT, + self: SameFrameOrSeries, tz, axis=0, level=None, copy: bool_t = True, ambiguous="raise", nonexistent: str = "raise", - ) -> FrameOrSeriesT: + ) -> SameFrameOrSeries: """ Localize tz-naive index of a Series or DataFrame to target time zone. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 26c0b12b6e854..42dbacca12847 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import Timestamp, lib -from pandas._typing import FrameOrSeriesT +from pandas._typing import SameFrameOrSeries from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import ( @@ -86,7 +86,7 @@ ScalarResult = typing.TypeVar("ScalarResult") -def generate_property(name: str, klass: Type[FrameOrSeriesT]): +def generate_property(name: str, klass: Type[SameFrameOrSeries]): """ Create a property for a GroupBy subclass to dispatch to DataFrame/Series. @@ -109,7 +109,9 @@ def prop(self): return property(prop) -def pin_whitelisted_properties(klass: Type[FrameOrSeriesT], whitelist: FrozenSet[str]): +def pin_whitelisted_properties( + klass: Type[SameFrameOrSeries], whitelist: FrozenSet[str] +): """ Create GroupBy member defs for DataFrame/Series names in a whitelist. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ab8df19a5031d..c0b9576150b6d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -33,7 +33,7 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby -from pandas._typing import FrameOrSeriesT, Scalar +from pandas._typing import SameFrameOrSeries, Scalar from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -2439,8 +2439,8 @@ def tail(self, n=5): return self._selected_obj[mask] def _reindex_output( - self, output: FrameOrSeriesT, fill_value: Scalar = np.NaN - ) -> FrameOrSeriesT: + self, output: SameFrameOrSeries, fill_value: Scalar = np.NaN + ) -> SameFrameOrSeries: """ If we have categorical groupers, then we might want to make sure that we have a fully re-indexed output to the levels. This means expanding diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a57014bab5892..a1dbef6ab289f 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._typing import FrameOrSeriesT +from pandas._typing import SameFrameOrSeries from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -141,7 +141,7 @@ def _get_grouper(self, obj, validate: bool = True): ) return self.binner, self.grouper, self.obj - def _set_grouper(self, obj: FrameOrSeriesT, sort: bool = False): + def _set_grouper(self, obj: SameFrameOrSeries, sort: bool = False): """ given an object and the specifications, setup the internal grouper for this particular specification @@ -244,7 +244,7 @@ def __init__( self, index: Index, grouper=None, - obj: Optional[FrameOrSeriesT] = None, + obj: Optional[SameFrameOrSeries] = None, name=None, level=None, sort: bool = True, @@ -424,7 +424,7 @@ def groups(self) -> Dict[Hashable, np.ndarray]: def get_grouper( - obj: FrameOrSeriesT, + obj: SameFrameOrSeries, key=None, axis: int = 0, level=None, @@ -432,7 +432,7 @@ def get_grouper( observed: bool = False, mutated: bool = False, validate: bool = True, -) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeriesT]": +) -> "Tuple[ops.BaseGrouper, List[Hashable], SameFrameOrSeries]": """ Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f357a9d4a5950..728cd23208af4 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -14,7 +14,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import FrameOrSeriesT +from pandas._typing import SameFrameOrSeries from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -111,7 +111,7 @@ def __iter__(self): def nkeys(self) -> int: return len(self.groupings) - def get_iterator(self, data: FrameOrSeriesT, axis: int = 0): + def get_iterator(self, data: SameFrameOrSeries, axis: int = 0): """ Groupby iterator @@ -125,7 +125,7 @@ def get_iterator(self, data: FrameOrSeriesT, axis: int = 0): for key, (i, group) in zip(keys, splitter): yield key, group - def _get_splitter(self, data: FrameOrSeriesT, axis: int = 0) -> "DataSplitter": + def _get_splitter(self, data: SameFrameOrSeries, axis: int = 0) -> "DataSplitter": comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) @@ -147,13 +147,13 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply(self, f, data: FrameOrSeriesT, axis: int = 0): + def apply(self, f, data: SameFrameOrSeries, axis: int = 0): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() result_values = None - sdata: FrameOrSeriesT = splitter._get_sorted_data() + sdata: SameFrameOrSeries = splitter._get_sorted_data() if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray @@ -754,7 +754,7 @@ def _get_grouper(self): """ return self - def get_iterator(self, data: FrameOrSeriesT, axis: int = 0): + def get_iterator(self, data: SameFrameOrSeries, axis: int = 0): """ Groupby iterator @@ -862,7 +862,7 @@ def _is_indexed_like(obj, axes) -> bool: class DataSplitter: - def __init__(self, data: FrameOrSeriesT, labels, ngroups: int, axis: int = 0): + def __init__(self, data: SameFrameOrSeries, labels, ngroups: int, axis: int = 0): self.data = data self.labels = ensure_int64(labels) self.ngroups = ngroups @@ -893,7 +893,7 @@ def __iter__(self): for i, (start, end) in enumerate(zip(starts, ends)): yield i, self._chop(sdata, slice(start, end)) - def _get_sorted_data(self) -> FrameOrSeriesT: + def _get_sorted_data(self) -> SameFrameOrSeries: return self.data.take(self.sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj: slice) -> NDFrame: @@ -920,7 +920,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: return sdata._slice(slice_obj, axis=1) -def get_splitter(data: FrameOrSeriesT, *args, **kwargs) -> DataSplitter: +def get_splitter(data: SameFrameOrSeries, *args, **kwargs) -> DataSplitter: if isinstance(data, Series): klass: Type[DataSplitter] = SeriesSplitter else: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index afbdef7c8f132..77d692c20b87e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -30,7 +30,7 @@ @overload def concat( - objs: Union[Sequence["DataFrame"], Mapping[Hashable, "DataFrame"]], + objs: Union[Sequence["DataFrame"], Mapping[Optional[Hashable], "DataFrame"]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -46,7 +46,7 @@ def concat( @overload def concat( - objs: Union[Sequence[FrameOrSeries], Mapping[Hashable, FrameOrSeries]], + objs: Union[Sequence[FrameOrSeries], Mapping[Optional[Hashable], FrameOrSeries]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -61,7 +61,7 @@ def concat( def concat( - objs: Union[Sequence[FrameOrSeries], Mapping[Hashable, FrameOrSeries]], + objs: Union[Sequence[FrameOrSeries], Mapping[Optional[Hashable], FrameOrSeries]], axis=0, join="outer", ignore_index: bool = False, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0e9a3fa0ffcde..25a39027b52cd 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -13,7 +13,7 @@ from pandas._libs import Timedelta, hashtable as libhashtable, lib import pandas._libs.join as libjoin -from pandas._typing import FrameOrSeriesT +from pandas._typing import SameFrameOrSeries from pandas.errors import MergeError from pandas.util._decorators import Appender, Substitution @@ -1994,7 +1994,7 @@ def _any(x) -> bool: return x is not None and com.any_not_none(*x) -def _validate_operand(obj: FrameOrSeriesT) -> "DataFrame": +def _validate_operand(obj: SameFrameOrSeries) -> "DataFrame": if isinstance(obj, ABCDataFrame): return obj elif isinstance(obj, ABCSeries): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index eb0e53d6a1c59..2fcc2603f4218 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -11,7 +11,7 @@ import numpy as np import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import Axis, FrameOrSeriesT, Scalar +from pandas._typing import Axis, SameFrameOrSeries, Scalar from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -324,7 +324,7 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeriesT: + def _wrap_results(self, results, blocks, obj, exclude=None) -> SameFrameOrSeries: """ Wrap the results. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6f554c69dd1ee..03c7710ec3339 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -27,7 +27,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, FrameOrSeriesT +from pandas._typing import ArrayLike, SameFrameOrSeries from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -231,7 +231,7 @@ def _tables(): def to_hdf( path_or_buf, key: str, - value: FrameOrSeriesT, + value: SameFrameOrSeries, mode: str = "a", complevel: Optional[int] = None, complib: Optional[str] = None, @@ -986,7 +986,7 @@ def func(_start, _stop, _where): def put( self, key: str, - value: FrameOrSeriesT, + value: SameFrameOrSeries, format=None, index=True, append=False, @@ -1102,7 +1102,7 @@ def remove(self, key: str, where=None, start=None, stop=None): def append( self, key: str, - value: FrameOrSeriesT, + value: SameFrameOrSeries, format=None, axes=None, index=True, @@ -1537,7 +1537,7 @@ def _create_storer( self, group, format=None, - value: Optional[FrameOrSeriesT] = None, + value: Optional[SameFrameOrSeries] = None, encoding: str = "UTF-8", errors: str = "strict", ) -> Union["GenericFixed", "Table"]: @@ -1630,7 +1630,7 @@ def error(t): def _write_to_group( self, key: str, - value: FrameOrSeriesT, + value: SameFrameOrSeries, format, axes=None, index=True, From 0d49daa0882de7e251b491014185f08fa59300a3 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 2 Jan 2020 02:55:41 +0000 Subject: [PATCH 5/6] linebreaks --- pandas/_typing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_typing.py b/pandas/_typing.py index 0f1be4517b201..7a0c620a7b188 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -42,16 +42,19 @@ Dtype = Union[str, np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] + # FrameOrSeries means either a DataFrame or a Series. E.g. # `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a Series is passed # in, either a Series or DataFrame is returned, and if a DataFrame is passed in, either # a DataFrame or a Series is returned. FrameOrSeries = Union["DataFrame", "Series"] + # SameFrameOrSeries is stricter and ensures that the same subclass of NDFrame always is # used. E.g. `def func(a: SameFrameOrSeries) -> SameFrameOrSeries: ...` means that if a # Series is passed into a function, a Series is always returned and if a DataFrame is # passed in, a DataFrame is always returned. SameFrameOrSeries = TypeVar("SameFrameOrSeries", bound="NDFrame") + Axis = Union[str, int] Ordered = Optional[bool] JSONSerializable = Union[PythonScalar, List, Dict] From 84d1c37a3cba6434eeb0610207942f8170ccf55f Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 3 Jan 2020 08:11:04 +0000 Subject: [PATCH 6/6] SameFrameOrSeries -> FrameOrSeries --- pandas/_testing.py | 6 ++-- pandas/_typing.py | 16 +++++----- pandas/core/generic.py | 56 ++++++++++++++++------------------ pandas/core/groupby/generic.py | 8 ++--- pandas/core/groupby/groupby.py | 6 ++-- pandas/core/groupby/grouper.py | 10 +++--- pandas/core/groupby/ops.py | 18 +++++------ pandas/core/reshape/concat.py | 14 ++++++--- pandas/core/reshape/merge.py | 4 +-- pandas/core/window/rolling.py | 4 +-- pandas/io/formats/format.py | 7 ++--- pandas/io/pytables.py | 12 ++++---- 12 files changed, 79 insertions(+), 82 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index c59c1fb7626e7..2ebebc5d5e10a 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -22,7 +22,7 @@ ) import pandas._libs.testing as _testing -from pandas._typing import SameFrameOrSeries +from pandas._typing import FrameOrSeries from pandas.compat import _get_lzma_file, _import_lzma from pandas.core.dtypes.common import ( @@ -101,9 +101,7 @@ def reset_display_options(): pd.reset_option("^display.", silent=True) -def round_trip_pickle( - obj: SameFrameOrSeries, path: Optional[str] = None -) -> SameFrameOrSeries: +def round_trip_pickle(obj: FrameOrSeries, path: Optional[str] = None) -> FrameOrSeries: """ Pickle an object and then read it again. diff --git a/pandas/_typing.py b/pandas/_typing.py index 7a0c620a7b188..14cf5157cea1d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -43,17 +43,17 @@ Dtype = Union[str, np.dtype, "ExtensionDtype"] FilePathOrBuffer = Union[str, Path, IO[AnyStr]] -# FrameOrSeries means either a DataFrame or a Series. E.g. -# `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a Series is passed -# in, either a Series or DataFrame is returned, and if a DataFrame is passed in, either -# a DataFrame or a Series is returned. -FrameOrSeries = Union["DataFrame", "Series"] +# FrameOrSeriesUnion means either a DataFrame or a Series. E.g. +# `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series +# is passed in, either a Series or DataFrame is returned, and if a DataFrame is passed +# in, either a DataFrame or a Series is returned. +FrameOrSeriesUnion = Union["DataFrame", "Series"] -# SameFrameOrSeries is stricter and ensures that the same subclass of NDFrame always is -# used. E.g. `def func(a: SameFrameOrSeries) -> SameFrameOrSeries: ...` means that if a +# FrameOrSeries is stricter and ensures that the same subclass of NDFrame always is +# used. E.g. `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a # Series is passed into a function, a Series is always returned and if a DataFrame is # passed in, a DataFrame is always returned. -SameFrameOrSeries = TypeVar("SameFrameOrSeries", bound="NDFrame") +FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame") Axis = Union[str, int] Ordered = Optional[bool] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f41a6b6447ecd..3b8e9cf82f08c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,7 @@ from pandas._config import config from pandas._libs import Timestamp, iNaT, lib, properties -from pandas._typing import Dtype, FilePathOrBuffer, JSONSerializable, SameFrameOrSeries +from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries, JSONSerializable from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -552,12 +552,12 @@ def size(self): return np.prod(self.shape) @property - def _selected_obj(self: SameFrameOrSeries) -> SameFrameOrSeries: + def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: """ internal compat with SelectionMixin """ return self @property - def _obj_with_exclusions(self: SameFrameOrSeries) -> SameFrameOrSeries: + def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: """ internal compat with SelectionMixin """ return self @@ -4670,7 +4670,7 @@ def f(x): else: raise TypeError("Must pass either `items`, `like`, or `regex`") - def head(self: SameFrameOrSeries, n: int = 5) -> SameFrameOrSeries: + def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: """ Return the first `n` rows. @@ -4743,7 +4743,7 @@ def head(self: SameFrameOrSeries, n: int = 5) -> SameFrameOrSeries: return self.iloc[:n] - def tail(self: SameFrameOrSeries, n: int = 5) -> SameFrameOrSeries: + def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: """ Return the last `n` rows. @@ -5188,8 +5188,8 @@ def pipe(self, func, *args, **kwargs): # Attribute access def __finalize__( - self: SameFrameOrSeries, other, method=None, **kwargs - ) -> SameFrameOrSeries: + self: FrameOrSeries, other, method=None, **kwargs + ) -> FrameOrSeries: """ Propagate metadata from other to self. @@ -5658,7 +5658,7 @@ def astype( result.columns = self.columns return result - def copy(self: SameFrameOrSeries, deep: bool_t = True) -> SameFrameOrSeries: + def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: """ Make a copy of this object's indices and data. @@ -5766,10 +5766,10 @@ def copy(self: SameFrameOrSeries, deep: bool_t = True) -> SameFrameOrSeries: data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) - def __copy__(self: SameFrameOrSeries, deep: bool_t = True) -> SameFrameOrSeries: + def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: return self.copy(deep=deep) - def __deepcopy__(self: SameFrameOrSeries, memo=None) -> SameFrameOrSeries: + def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: """ Parameters ---------- @@ -5779,13 +5779,13 @@ def __deepcopy__(self: SameFrameOrSeries, memo=None) -> SameFrameOrSeries: return self.copy(deep=True) def _convert( - self: SameFrameOrSeries, + self: FrameOrSeries, datetime: bool_t = False, numeric: bool_t = False, timedelta: bool_t = False, coerce: bool_t = False, copy: bool_t = True, - ) -> SameFrameOrSeries: + ) -> FrameOrSeries: """ Attempt to infer better dtype for object columns @@ -5877,14 +5877,14 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries: # Filling NA's def fillna( - self: SameFrameOrSeries, + self: FrameOrSeries, value=None, method=None, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[SameFrameOrSeries]: + ) -> Optional[FrameOrSeries]: """ Fill NA/NaN values using the specified method. @@ -6066,12 +6066,12 @@ def fillna( return self._constructor(new_data).__finalize__(self) def ffill( - self: SameFrameOrSeries, + self: FrameOrSeries, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[SameFrameOrSeries]: + ) -> Optional[FrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. @@ -6085,12 +6085,12 @@ def ffill( ) def bfill( - self: SameFrameOrSeries, + self: FrameOrSeries, axis=None, inplace: bool_t = False, limit=None, downcast=None, - ) -> Optional[SameFrameOrSeries]: + ) -> Optional[FrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. @@ -8055,14 +8055,14 @@ def last(self: FrameOrSeries, offset) -> FrameOrSeries: return self.iloc[start:] def rank( - self: SameFrameOrSeries, + self: FrameOrSeries, axis=0, method: str = "average", numeric_only: Optional[bool_t] = None, na_option: str = "keep", ascending: bool_t = True, pct: bool_t = False, - ) -> SameFrameOrSeries: + ) -> FrameOrSeries: """ Compute numerical data ranks (1 through n) along axis. @@ -8870,9 +8870,7 @@ def shift( return self._constructor(new_data).__finalize__(self) - def slice_shift( - self: SameFrameOrSeries, periods: int = 1, axis=0 - ) -> SameFrameOrSeries: + def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: """ Equivalent to `shift` without copying data. @@ -8972,8 +8970,8 @@ def tshift( return self._constructor(new_data).__finalize__(self) def truncate( - self: SameFrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True - ) -> SameFrameOrSeries: + self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True + ) -> FrameOrSeries: """ Truncate a Series or DataFrame before and after some index value. @@ -9126,8 +9124,8 @@ def truncate( return result def tz_convert( - self: SameFrameOrSeries, tz, axis=0, level=None, copy: bool_t = True - ) -> SameFrameOrSeries: + self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True + ) -> FrameOrSeries: """ Convert tz-aware axis to target time zone. @@ -9183,14 +9181,14 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize( - self: SameFrameOrSeries, + self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True, ambiguous="raise", nonexistent: str = "raise", - ) -> SameFrameOrSeries: + ) -> FrameOrSeries: """ Localize tz-naive index of a Series or DataFrame to target time zone. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 42dbacca12847..c49677fa27a31 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import Timestamp, lib -from pandas._typing import SameFrameOrSeries +from pandas._typing import FrameOrSeries from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import ( @@ -86,7 +86,7 @@ ScalarResult = typing.TypeVar("ScalarResult") -def generate_property(name: str, klass: Type[SameFrameOrSeries]): +def generate_property(name: str, klass: Type[FrameOrSeries]): """ Create a property for a GroupBy subclass to dispatch to DataFrame/Series. @@ -109,9 +109,7 @@ def prop(self): return property(prop) -def pin_whitelisted_properties( - klass: Type[SameFrameOrSeries], whitelist: FrozenSet[str] -): +def pin_whitelisted_properties(klass: Type[FrameOrSeries], whitelist: FrozenSet[str]): """ Create GroupBy member defs for DataFrame/Series names in a whitelist. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c0b9576150b6d..1ba4938d45fc9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -33,7 +33,7 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby -from pandas._typing import SameFrameOrSeries, Scalar +from pandas._typing import FrameOrSeries, Scalar from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -2439,8 +2439,8 @@ def tail(self, n=5): return self._selected_obj[mask] def _reindex_output( - self, output: SameFrameOrSeries, fill_value: Scalar = np.NaN - ) -> SameFrameOrSeries: + self, output: FrameOrSeries, fill_value: Scalar = np.NaN + ) -> FrameOrSeries: """ If we have categorical groupers, then we might want to make sure that we have a fully re-indexed output to the levels. This means expanding diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a1dbef6ab289f..7e7261130ff4a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._typing import SameFrameOrSeries +from pandas._typing import FrameOrSeries from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -141,7 +141,7 @@ def _get_grouper(self, obj, validate: bool = True): ) return self.binner, self.grouper, self.obj - def _set_grouper(self, obj: SameFrameOrSeries, sort: bool = False): + def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): """ given an object and the specifications, setup the internal grouper for this particular specification @@ -244,7 +244,7 @@ def __init__( self, index: Index, grouper=None, - obj: Optional[SameFrameOrSeries] = None, + obj: Optional[FrameOrSeries] = None, name=None, level=None, sort: bool = True, @@ -424,7 +424,7 @@ def groups(self) -> Dict[Hashable, np.ndarray]: def get_grouper( - obj: SameFrameOrSeries, + obj: FrameOrSeries, key=None, axis: int = 0, level=None, @@ -432,7 +432,7 @@ def get_grouper( observed: bool = False, mutated: bool = False, validate: bool = True, -) -> "Tuple[ops.BaseGrouper, List[Hashable], SameFrameOrSeries]": +) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]": """ Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 728cd23208af4..37067a1897a52 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -14,7 +14,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import SameFrameOrSeries +from pandas._typing import FrameOrSeries from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -111,7 +111,7 @@ def __iter__(self): def nkeys(self) -> int: return len(self.groupings) - def get_iterator(self, data: SameFrameOrSeries, axis: int = 0): + def get_iterator(self, data: FrameOrSeries, axis: int = 0): """ Groupby iterator @@ -125,7 +125,7 @@ def get_iterator(self, data: SameFrameOrSeries, axis: int = 0): for key, (i, group) in zip(keys, splitter): yield key, group - def _get_splitter(self, data: SameFrameOrSeries, axis: int = 0) -> "DataSplitter": + def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter": comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) @@ -147,13 +147,13 @@ def _get_group_keys(self): # provide "flattened" iterator for multi-group setting return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) - def apply(self, f, data: SameFrameOrSeries, axis: int = 0): + def apply(self, f, data: FrameOrSeries, axis: int = 0): mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() result_values = None - sdata: SameFrameOrSeries = splitter._get_sorted_data() + sdata: FrameOrSeries = splitter._get_sorted_data() if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray @@ -754,7 +754,7 @@ def _get_grouper(self): """ return self - def get_iterator(self, data: SameFrameOrSeries, axis: int = 0): + def get_iterator(self, data: FrameOrSeries, axis: int = 0): """ Groupby iterator @@ -862,7 +862,7 @@ def _is_indexed_like(obj, axes) -> bool: class DataSplitter: - def __init__(self, data: SameFrameOrSeries, labels, ngroups: int, axis: int = 0): + def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): self.data = data self.labels = ensure_int64(labels) self.ngroups = ngroups @@ -893,7 +893,7 @@ def __iter__(self): for i, (start, end) in enumerate(zip(starts, ends)): yield i, self._chop(sdata, slice(start, end)) - def _get_sorted_data(self) -> SameFrameOrSeries: + def _get_sorted_data(self) -> FrameOrSeries: return self.data.take(self.sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj: slice) -> NDFrame: @@ -920,7 +920,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: return sdata._slice(slice_obj, axis=1) -def get_splitter(data: SameFrameOrSeries, *args, **kwargs) -> DataSplitter: +def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: if isinstance(data, Series): klass: Type[DataSplitter] = SeriesSplitter else: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 77d692c20b87e..2007f6aa32a57 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,7 +6,7 @@ import numpy as np -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesUnion from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.arrays.categorical import ( @@ -46,7 +46,9 @@ def concat( @overload def concat( - objs: Union[Sequence[FrameOrSeries], Mapping[Optional[Hashable], FrameOrSeries]], + objs: Union[ + Sequence[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] + ], axis=0, join: str = "outer", ignore_index: bool = False, @@ -56,12 +58,14 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, -) -> FrameOrSeries: +) -> FrameOrSeriesUnion: ... def concat( - objs: Union[Sequence[FrameOrSeries], Mapping[Optional[Hashable], FrameOrSeries]], + objs: Union[ + Sequence[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] + ], axis=0, join="outer", ignore_index: bool = False, @@ -71,7 +75,7 @@ def concat( verify_integrity: bool = False, sort: bool = False, copy: bool = True, -) -> FrameOrSeries: +) -> FrameOrSeriesUnion: """ Concatenate pandas objects along a particular axis with optional set logic along the other axes. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 25a39027b52cd..6fe2287923fcb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -13,7 +13,7 @@ from pandas._libs import Timedelta, hashtable as libhashtable, lib import pandas._libs.join as libjoin -from pandas._typing import SameFrameOrSeries +from pandas._typing import FrameOrSeries from pandas.errors import MergeError from pandas.util._decorators import Appender, Substitution @@ -1994,7 +1994,7 @@ def _any(x) -> bool: return x is not None and com.any_not_none(*x) -def _validate_operand(obj: SameFrameOrSeries) -> "DataFrame": +def _validate_operand(obj: FrameOrSeries) -> "DataFrame": if isinstance(obj, ABCDataFrame): return obj elif isinstance(obj, ABCSeries): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2fcc2603f4218..c3c3e61f222df 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -11,7 +11,7 @@ import numpy as np import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import Axis, SameFrameOrSeries, Scalar +from pandas._typing import Axis, FrameOrSeries, Scalar from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -324,7 +324,7 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj, exclude=None) -> SameFrameOrSeries: + def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: """ Wrap the results. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 94259c5ecdba1..3020ac421fc2f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -260,7 +260,6 @@ def __init__( self._chk_truncate() def _chk_truncate(self) -> None: - from pandas.core.series import Series from pandas.core.reshape.concat import concat self.tr_row_num: Optional[int] @@ -282,9 +281,9 @@ def _chk_truncate(self) -> None: series = series.iloc[:max_rows] else: row_num = max_rows // 2 - concatted = concat((series.iloc[:row_num], series.iloc[-row_num:])) - assert isinstance(concatted, Series) - series = concatted + series = series._ensure_type( + concat((series.iloc[:row_num], series.iloc[-row_num:])) + ) self.tr_row_num = row_num else: self.tr_row_num = None diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 03c7710ec3339..3d2c2159bfbdd 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -27,7 +27,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, SameFrameOrSeries +from pandas._typing import ArrayLike, FrameOrSeries from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -231,7 +231,7 @@ def _tables(): def to_hdf( path_or_buf, key: str, - value: SameFrameOrSeries, + value: FrameOrSeries, mode: str = "a", complevel: Optional[int] = None, complib: Optional[str] = None, @@ -986,7 +986,7 @@ def func(_start, _stop, _where): def put( self, key: str, - value: SameFrameOrSeries, + value: FrameOrSeries, format=None, index=True, append=False, @@ -1102,7 +1102,7 @@ def remove(self, key: str, where=None, start=None, stop=None): def append( self, key: str, - value: SameFrameOrSeries, + value: FrameOrSeries, format=None, axes=None, index=True, @@ -1537,7 +1537,7 @@ def _create_storer( self, group, format=None, - value: Optional[SameFrameOrSeries] = None, + value: Optional[FrameOrSeries] = None, encoding: str = "UTF-8", errors: str = "strict", ) -> Union["GenericFixed", "Table"]: @@ -1630,7 +1630,7 @@ def error(t): def _write_to_group( self, key: str, - value: SameFrameOrSeries, + value: FrameOrSeries, format, axes=None, index=True,