Skip to content

TYP: pandas/core/frame.py #38416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Mar 15, 2021
Merged
Changes from 14 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
46bf318
typing
arw2019 Dec 9, 2020
eba7251
typing
arw2019 Dec 11, 2020
1a45267
typing
arw2019 Dec 11, 2020
f5cb185
typing
arw2019 Dec 11, 2020
4c9b764
review comments
arw2019 Dec 11, 2020
a273e5c
review comment
arw2019 Dec 13, 2020
e0558c5
review comment
arw2019 Dec 13, 2020
e156caa
merge master
arw2019 Dec 14, 2020
13ff245
merge master
arw2019 Dec 14, 2020
617e228
revert hints to pivot_table
arw2019 Dec 14, 2020
9a4d187
take out merge
arw2019 Dec 14, 2020
77e1142
minimize diff
arw2019 Dec 14, 2020
2352e45
fix eval return type annotation
arw2019 Dec 14, 2020
a3b7b1b
merge master
arw2019 Dec 14, 2020
1c942fd
review comments
arw2019 Dec 15, 2020
0e747dd
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 Dec 15, 2020
11247d9
review comments
arw2019 Dec 21, 2020
d82d81e
merge master
arw2019 Dec 21, 2020
56ee7c4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 Dec 27, 2020
168cad2
review comments
arw2019 Jan 5, 2021
e12ed2e
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 Jan 5, 2021
6c876ff
review comments
arw2019 Jan 5, 2021
0e780ce
review comments
arw2019 Jan 5, 2021
a93489a
review: remove NpDtype usage
arw2019 Jan 6, 2021
2cd6823
merge master
arw2019 Jan 6, 2021
3e39e44
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 Jan 10, 2021
a28deaf
merge master
arw2019 Feb 11, 2021
9243c6b
merge master
arw2019 Feb 21, 2021
58fcad8
typing
arw2019 Feb 21, 2021
ff317d3
restore return type for memory_usage
arw2019 Feb 22, 2021
02647b5
remove return type for _series
arw2019 Feb 22, 2021
cc517a5
rogue comma
arw2019 Feb 22, 2021
666533c
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 Feb 22, 2021
5d08d71
Merge remote-tracking branch 'upstream/master' into typing-frame
simonjayhawkins Mar 14, 2021
3b52fc9
Merge remote-tracking branch 'upstream/master' into typing-frame
simonjayhawkins Mar 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 64 additions & 28 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
TYPE_CHECKING,
Any,
AnyStr,
Callable,
Dict,
FrozenSet,
Hashable,
Iterable,
Iterator,
List,
Mapping,
Optional,
Sequence,
Set,
Expand All @@ -48,6 +50,7 @@
from pandas._libs.lib import no_default
from pandas._typing import (
AggFuncType,
AnyArrayLike,
ArrayLike,
Axes,
Axis,
Expand All @@ -62,6 +65,7 @@
Label,
Level,
Renamer,
Scalar,
StorageOptions,
ValueKeyFunc,
)
Expand Down Expand Up @@ -1069,7 +1073,9 @@ def iterrows(self) -> Iterable[Tuple[Label, Series]]:
s = klass(v, index=columns, name=k)
yield k, s

def itertuples(self, index: bool = True, name: Optional[str] = "Pandas"):
def itertuples(
self, index: bool = True, name: Optional[str] = "Pandas"
) -> Iterable[Tuple]:
"""
Iterate over DataFrame rows as namedtuples.

Expand Down Expand Up @@ -1159,7 +1165,7 @@ def __len__(self) -> int:
"""
return len(self.index)

def dot(self, other):
def dot(self, other: Union[AnyArrayLike, FrameOrSeriesUnion]) -> FrameOrSeriesUnion:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AnyArrayLike includes Series, so could probably use Union[AnyArrayLike, DataFrame])

will probably want to overload here in a future pass to refine the return type and avoid union return types

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opened #38765 to add the overload, will revert here to orthogonalize the PRs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted

"""
Compute the matrix multiplication between the DataFrame and other.

Expand Down Expand Up @@ -1269,13 +1275,17 @@ def dot(self, other):
else: # pragma: no cover
raise TypeError(f"unsupported type: {type(other)}")

def __matmul__(self, other):
def __matmul__(
self, other: Union[AnyArrayLike, FrameOrSeriesUnion]
) -> FrameOrSeriesUnion:
"""
Matrix multiplication using binary `@` operator in Python>=3.5.
"""
return self.dot(other)

def __rmatmul__(self, other):
def __rmatmul__(
self, other: Union[AnyArrayLike, FrameOrSeriesUnion]
) -> FrameOrSeriesUnion:
"""
Matrix multiplication using binary `@` operator in Python>=3.5.
"""
Expand All @@ -1292,7 +1302,13 @@ def __rmatmul__(self, other):
# IO methods (to / from other formats)

@classmethod
def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> DataFrame:
def from_dict(
cls,
data,
orient: str = "columns",
dtype: Optional[Dtype] = None,
columns: Optional[List[Label]] = None,
) -> DataFrame:
"""
Construct DataFrame from dict of array-like or dicts.

Expand Down Expand Up @@ -1371,7 +1387,10 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> DataFram
return cls(data, index=index, columns=columns, dtype=dtype)

def to_numpy(
self, dtype=None, copy: bool = False, na_value=lib.no_default
self,
dtype: Optional[Dtype] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dtype is passed onto np.array constructor. Dtype includes ExtensionDtype, which I think would probably raise at runtime and create mypy errors when we add numpy types shortly.

maybe best to leave untyped for now pending being able to use a dtype alias from numpy shortly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see we recently added NpDtype to pd._typing. Shall I use that here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have reservations on using NpDtype (and adding types to dtype parameters in general, such as #38808) as while these types resolve to Any, mypy will not report any inconsistencies.

The issue here will be picked up when we have the numpy types and therefore the burden on reviewers is reduced.

My reservations on adding these ahead of time is that there will likely be more errors to resolve when we transition to using numpy types. Although that boat has already sailed, xref #36092

so rather than doing anything more here now, I'm happy to accept that mypy is green.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Removed NpDtype usage here (much easier to revisit after #36092, where there seems to be a lot to do already, has gone in)

copy: bool = False,
na_value: Scalar = lib.no_default,
) -> np.ndarray:
"""
Convert the DataFrame to a NumPy array.
Expand Down Expand Up @@ -1438,7 +1457,7 @@ def to_numpy(

return result

def to_dict(self, orient: str = "dict", into=dict):
def to_dict(self, orient: str = "dict", into=dict) -> Union[Dict, List, Mapping]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mapping should be enough here, why did you add List?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with orient="records" we return a list:

pandas/pandas/core/frame.py

Lines 1589 to 1599 in 36c4d5c

elif orient == "records":
columns = self.columns.tolist()
rows = (
dict(zip(columns, row))
for row in self.itertuples(index=False, name=None)
)
return [
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
for row in rows
]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can at least get rid of Dict then - no reason to include Dict and Mapping unless I am overlooking something

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's right - done

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with orient="records" we return a list:

we should probably overload using Literal to avoid Union return types. but not in this pass.

"""
Convert the DataFrame to a dictionary.

Expand Down Expand Up @@ -1727,7 +1746,7 @@ def from_records(
exclude=None,
columns=None,
coerce_float: bool = False,
nrows=None,
nrows: Optional[bool] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes (made a mistake)

) -> DataFrame:
"""
Convert structured or record ndarray to DataFrame.
Expand Down Expand Up @@ -2710,7 +2729,7 @@ def info(
show_counts=show_counts,
)

def memory_usage(self, index=True, deep=False) -> Series:
def memory_usage(self, index: bool = True, deep: bool = False) -> Series:
"""
Return the memory usage of each column in bytes.

Expand Down Expand Up @@ -2933,7 +2952,7 @@ def T(self) -> DataFrame:
# ----------------------------------------------------------------------
# Indexing Methods

def _ixs(self, i: int, axis: int = 0):
def _ixs(self, i: int, axis: Axis = 0):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Axis includes str, we can only use the Axis alias if we use axis = self._get_axis_number(axis) or pass it onto a method that does so. Here we don't do that lookup so should use int

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, fixed

"""
Parameters
----------
Expand Down Expand Up @@ -3110,7 +3129,7 @@ def _getitem_multilevel(self, key):
# loc is neither a slice nor ndarray, so must be an int
return self._ixs(loc, axis=1)

def _get_value(self, index, col, takeable: bool = False):
def _get_value(self, index, col, takeable: bool = False) -> Scalar:
"""
Quickly retrieve single value at passed column and index.

Expand Down Expand Up @@ -3264,7 +3283,7 @@ def _iset_item(self, loc: int, value):
if len(self):
self._check_setitem_copy()

def _set_item(self, key, value):
def _set_item(self, key, value) -> None:
"""
Add series to DataFrame in specified column.

Expand All @@ -3289,7 +3308,9 @@ def _set_item(self, key, value):

self._set_item_mgr(key, value)

def _set_value(self, index, col, value, takeable: bool = False):
def _set_value(
self, index: int, col, value: Scalar, takeable: bool = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the docstring states index : row label, is index: int correct here?

maybe need to update the docstring to the

parameter: type
    description

format

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, so should be Label I think (again, my bad)

Docstring updated.

) -> None:
"""
Put single value at passed column and index.

Expand Down Expand Up @@ -3322,7 +3343,7 @@ def _set_value(self, index, col, value, takeable: bool = False):
self.loc[index, col] = value
self._item_cache.pop(col, None)

def _ensure_valid_index(self, value):
def _ensure_valid_index(self, value) -> None:
"""
Ensure that if we don't have an index, that we can create one from the
passed value.
Expand Down Expand Up @@ -3357,7 +3378,7 @@ def _box_col_values(self, values, loc: int) -> Series:
# ----------------------------------------------------------------------
# Unsorted

def query(self, expr: str, inplace: bool = False, **kwargs):
def query(self, expr: str, inplace: bool = False, **kwargs) -> Optional[DataFrame]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think this is correct for the output type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is right. From docstring:

DataFrame resulting from the provided query expression or None if ``inplace=True``.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no my point is that the doc-string is wrong. this can be a Series or Scalar. basically this can be anything. I would remove it for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok - removed (and will fix the docstring in follow-on, too)

"""
Query the columns of a DataFrame with a boolean expression.

Expand Down Expand Up @@ -3518,10 +3539,13 @@ def query(self, expr: str, inplace: bool = False, **kwargs):

if inplace:
self._update_inplace(result)
return None
else:
return result

def eval(self, expr: str, inplace: bool = False, **kwargs):
def eval(
self, expr: str, inplace: bool = False, **kwargs
) -> Optional[Union[AnyArrayLike, DataFrame, Scalar]]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same remove this for now leave for another pass.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

"""
Evaluate a string describing operations on DataFrame columns.

Expand Down Expand Up @@ -3946,15 +3970,17 @@ def _sanitize_column(self, value):
return value

@property
def _series(self):
def _series(self) -> "Dict[int, Series]":
return {
item: Series(
self._mgr.iget(idx), index=self.index, name=item, fastpath=True
)
for idx, item in enumerate(self.columns)
}

def lookup(self, row_labels, col_labels) -> np.ndarray:
def lookup(
self, row_labels: Sequence[Label], col_labels: Sequence[Label]
) -> np.ndarray:
"""
Label-based "fancy indexing" function for DataFrame.
Given equal-length arrays of row and column labels, return an
Expand Down Expand Up @@ -6012,7 +6038,7 @@ def _arith_method(self, other, op):

_logical_method = _arith_method

def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
def _dispatch_frame_op(self, right, func: Callable, axis: Optional[int] = None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Expand Down Expand Up @@ -6093,7 +6119,7 @@ def _arith_op(left, right):
new_data = self._dispatch_frame_op(other, _arith_op)
return new_data

def _construct_result(self, result) -> DataFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why was this deleted? self._constructor is typed as Type[DataFrame]. while not 100% correct, this should not be giving mypy errors.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think by mistake, reverting

def _construct_result(self, result: DataFrame) -> DataFrame:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not complete, this would be an ndarray, scalar or DataFrame

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

essentially anything that can be input to the constructor. leave it out for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"""
Wrap the result of an arithmetic, comparison, or logical operation.

Expand Down Expand Up @@ -7323,7 +7349,7 @@ def explode(

return result

def unstack(self, level=-1, fill_value=None):
def unstack(self, level: Level = -1, fill_value=None):
"""
Pivot a level of the (necessarily hierarchical) index labels.

Expand Down Expand Up @@ -7394,7 +7420,7 @@ def melt(
var_name=None,
value_name="value",
col_level: Optional[Level] = None,
ignore_index=True,
ignore_index: bool = True,
) -> DataFrame:

return melt(
Expand Down Expand Up @@ -7802,7 +7828,7 @@ def apply(
)
return op.get_result()

def applymap(self, func, na_action: Optional[str] = None) -> DataFrame:
def applymap(self, func: Callable, na_action: Optional[str] = None) -> DataFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be typed more strictly than Callable? Subscripting Callable is much more useful to a reader if possible

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - @jreback asked for this too. I'll put up a follow-on for this (to avoid expanding the scope of this PR)

"""
Apply a function to a Dataframe elementwise.

Expand Down Expand Up @@ -8253,7 +8279,9 @@ def merge(
validate=validate,
)

def round(self, decimals=0, *args, **kwargs) -> DataFrame:
def round(
self, decimals: Union[int, Dict, Series] = 0, *args, **kwargs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try to avoid generics without type parameters. Dict[Label, int]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - added

) -> DataFrame:
"""
Round a DataFrame to a variable number of decimal places.

Expand Down Expand Up @@ -8367,7 +8395,9 @@ def _series_round(s, decimals):
# ----------------------------------------------------------------------
# Statistical methods, etc.

def corr(self, method="pearson", min_periods=1) -> DataFrame:
def corr(
self, method: Union[str, Callable] = "pearson", min_periods: int = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Callable[[np.ndarray,np.ndarray], float]?

) -> DataFrame:
"""
Compute pairwise correlation of columns, excluding NA/null values.

Expand Down Expand Up @@ -9331,7 +9361,11 @@ def quantile(
return result

def to_timestamp(
self, freq=None, how: str = "start", axis: Axis = 0, copy: bool = True
self,
freq: Optional[str] = None,
how: str = "start",
axis: Axis = 0,
copy: bool = True,
) -> DataFrame:
"""
Cast to DatetimeIndex of timestamps, at *beginning* of period.
Expand Down Expand Up @@ -9364,7 +9398,9 @@ def to_timestamp(
setattr(new_obj, axis_name, new_ax)
return new_obj

def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> DataFrame:
def to_period(
self, freq: Optional[str] = None, axis: Axis = 0, copy: bool = True
) -> DataFrame:
"""
Convert DataFrame from DatetimeIndex to PeriodIndex.

Expand Down Expand Up @@ -9396,7 +9432,7 @@ def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> DataFrame:
setattr(new_obj, axis_name, new_ax)
return new_obj

def isin(self, values) -> DataFrame:
def isin(self, values: Union[FrameOrSeriesUnion, Dict, Iterable]) -> DataFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type parameters for Dict and Iterable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

"""
Whether each element in the DataFrame is contained in values.

Expand Down