Skip to content

TYP: Add types to top-level funcs, step 2 #30582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from pandas.core.arrays.base import ExtensionArray # noqa: F401
from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401
from pandas.core.indexes.base import Index # noqa: F401
from pandas.core.series import Series # noqa: F401
from pandas.core.generic import NDFrame # noqa: F401
from pandas import Interval # noqa: F401
from pandas.core.series import Series # noqa: F401
from pandas.core.frame import DataFrame # noqa: F401

# array-like

Expand All @@ -41,7 +42,19 @@

Dtype = Union[str, np.dtype, "ExtensionDtype"]
FilePathOrBuffer = Union[str, Path, IO[AnyStr]]

# FrameOrSeriesUnion means either a DataFrame or a Series. E.g.
# `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
# is passed in, either a Series or DataFrame is returned, and if a DataFrame is passed
# in, either a DataFrame or a Series is returned.
FrameOrSeriesUnion = Union["DataFrame", "Series"]

# FrameOrSeries is stricter and ensures that the same subclass of NDFrame always is
# used. E.g. `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a
# Series is passed into a function, a Series is always returned and if a DataFrame is
# passed in, a DataFrame is always returned.
FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")

Axis = Union[str, int]
Ordered = Optional[bool]
JSONSerializable = Union[PythonScalar, List, Dict]
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
intended for public consumption
"""
from textwrap import dedent
from typing import Dict, Optional, Tuple, Union
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from warnings import catch_warnings, simplefilter, warn

import numpy as np
Expand Down Expand Up @@ -50,6 +50,9 @@
from pandas.core.construction import array, extract_array
from pandas.core.indexers import validate_indices

if TYPE_CHECKING:
from pandas import Series

_shared_docs: Dict[str, str] = {}


Expand Down Expand Up @@ -651,7 +654,7 @@ def value_counts(
normalize: bool = False,
bins=None,
dropna: bool = True,
) -> ABCSeries:
) -> "Series":
Copy link
Contributor Author

@topper-123 topper-123 Dec 31, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ABCSeries doesn't work as a return type, AFAIS: Doing a x = pd.value_counts(x); reveal_type(x) returns Any, if the return type is ABCSeries.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, all ABCs used for type annotations need to be replaced. we need a custom script/clever regex to avoid this in future.

"""
Compute a histogram of the counts of non-null values.

Expand Down Expand Up @@ -793,7 +796,7 @@ def duplicated(values, keep="first") -> np.ndarray:
return f(values, keep=keep)


def mode(values, dropna: bool = True) -> ABCSeries:
def mode(values, dropna: bool = True) -> "Series":
"""
Returns the mode(s) of an array.

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5878,7 +5878,7 @@ def groupby(

@Substitution("")
@Appender(_shared_docs["pivot"])
def pivot(self, index=None, columns=None, values=None):
def pivot(self, index=None, columns=None, values=None) -> "DataFrame":
from pandas.core.reshape.pivot import pivot

return pivot(self, index=index, columns=columns, values=values)
Expand Down Expand Up @@ -6025,7 +6025,7 @@ def pivot_table(
dropna=True,
margins_name="All",
observed=False,
):
) -> "DataFrame":
from pandas.core.reshape.pivot import pivot_table

return pivot_table(
Expand Down
44 changes: 41 additions & 3 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
concat routines
"""

from typing import Hashable, List, Optional
from typing import Hashable, List, Mapping, Optional, Sequence, Union, overload

import numpy as np

from pandas._typing import FrameOrSeriesUnion

from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.arrays.categorical import (
factorize_from_iterable,
Expand All @@ -26,8 +28,27 @@
# Concatenate DataFrame objects


@overload
def concat(
objs: Union[Sequence["DataFrame"], Mapping[Optional[Hashable], "DataFrame"]],
axis=0,
join: str = "outer",
ignore_index: bool = False,
keys=None,
levels=None,
names=None,
verify_integrity: bool = False,
sort: bool = False,
copy: bool = True,
) -> "DataFrame":
...


@overload
def concat(
objs,
objs: Union[
Sequence[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion]
],
axis=0,
join: str = "outer",
ignore_index: bool = False,
Expand All @@ -37,7 +58,24 @@ def concat(
verify_integrity: bool = False,
sort: bool = False,
copy: bool = True,
):
) -> FrameOrSeriesUnion:
...


def concat(
objs: Union[
Sequence[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion]
],
axis=0,
join="outer",
ignore_index: bool = False,
keys=None,
levels=None,
names=None,
verify_integrity: bool = False,
sort: bool = False,
copy: bool = True,
) -> FrameOrSeriesUnion:
"""
Concatenate pandas objects along a particular axis with optional set logic
along the other axes.
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr
return data._constructor(mdata, columns=id_cols + pivot_cols)


def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
def wide_to_long(
df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
) -> DataFrame:
r"""
Wide panel to long format. Less flexible but more user-friendly than melt.

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Callable, Dict, Tuple, Union
from typing import TYPE_CHECKING, Callable, Dict, List, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -40,7 +40,7 @@ def pivot_table(
columns = _convert_by(columns)

if isinstance(aggfunc, list):
pieces = []
pieces: List[DataFrame] = []
keys = []
for func in aggfunc:
table = pivot_table(
Expand Down Expand Up @@ -459,7 +459,7 @@ def crosstab(
margins_name: str = "All",
dropna: bool = True,
normalize=False,
):
) -> "DataFrame":
"""
Compute a simple cross tabulation of two (or more) factors. By default
computes a frequency table of the factors unless an array of values and an
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from functools import partial
import itertools
from typing import List

import numpy as np

Expand Down Expand Up @@ -755,7 +756,7 @@ def get_dummies(
sparse=False,
drop_first=False,
dtype=None,
):
) -> "DataFrame":
"""
Convert categorical variable into dummy/indicator variables.

Expand Down Expand Up @@ -899,7 +900,7 @@ def check_len(item, name):

if data_to_encode.shape == data.shape:
# Encoding the entire df, do not prepend any dropped columns
with_dummies = []
with_dummies: List[DataFrame] = []
elif columns is not None:
# Encoding only cols specified in columns. Get all cols not in
# columns to prepend to result.
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,9 @@ def _chk_truncate(self) -> None:
series = series.iloc[:max_rows]
else:
row_num = max_rows // 2
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
series = series._ensure_type(
concat((series.iloc[:row_num], series.iloc[-row_num:]))
)
self.tr_row_num = row_num
else:
self.tr_row_num = None
Expand Down