-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
added f strings and typing to frame.py #30021
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
f5b303e
added f strings and typing to frame.py
c3fd308
minor fix
7602461
Merge branch 'master' of https://github.com/pandas-dev/pandas into fr…
3a4c244
cleaned up f strings, and flack 8 errors per PR comments
ef87c64
fixed return annotation of functions that return a DataFrame
fde23a9
fixed annotation of functions that return a Series
cf33998
Update pandas/core/frame.py
mck619 d223c88
Update pandas/core/frame.py
mck619 188410c
Update pandas/core/frame.py
mck619 bfdf696
Update pandas/core/frame.py
mck619 0ecb000
Update pandas/core/frame.py
mck619 7b52345
typing syntax fix
5e7d915
more typing syntax fixes
70ef860
Update pandas/core/frame.py
mck619 997a2e3
fixed fstring with err_msg
2e05e01
Merge branch 'frame_typing_fstring' of https://github.com/mck619/pand…
a00c34d
Update pandas/core/frame.py
mck619 099feb6
fstring clean up
85909ea
Merge branch 'frame_typing_fstring' of https://github.com/mck618/pand…
18fed32
black formatting
17444ec
mypy fixes per Simon's comments
1a9c6f0
doc string fix
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
import sys | ||
from textwrap import dedent | ||
from typing import ( | ||
Any, | ||
FrozenSet, | ||
Hashable, | ||
Iterable, | ||
|
@@ -25,6 +26,7 @@ | |
Tuple, | ||
Type, | ||
Union, | ||
cast, | ||
) | ||
import warnings | ||
|
||
|
@@ -475,7 +477,7 @@ def __init__( | |
except (ValueError, TypeError) as e: | ||
exc = TypeError( | ||
"DataFrame constructor called with " | ||
"incompatible data and dtype: {e}".format(e=e) | ||
f"incompatible data and dtype: {e}" | ||
) | ||
raise exc from e | ||
|
||
|
@@ -1112,8 +1114,7 @@ def dot(self, other): | |
rvals = np.asarray(other) | ||
if lvals.shape[1] != rvals.shape[0]: | ||
raise ValueError( | ||
"Dot product shape mismatch, " | ||
"{s} vs {r}".format(s=lvals.shape, r=rvals.shape) | ||
f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" | ||
) | ||
|
||
if isinstance(other, DataFrame): | ||
|
@@ -1129,7 +1130,7 @@ def dot(self, other): | |
else: | ||
return Series(result, index=left.index) | ||
else: # pragma: no cover | ||
raise TypeError("unsupported type: {oth}".format(oth=type(other))) | ||
raise TypeError(f"unsupported type: {type(other)}") | ||
|
||
def __matmul__(self, other): | ||
""" | ||
|
@@ -1417,7 +1418,7 @@ def to_dict(self, orient="dict", into=dict): | |
for t in self.itertuples(name=None) | ||
) | ||
else: | ||
raise ValueError("orient '{o}' not understood".format(o=orient)) | ||
raise ValueError(f"orient '{orient}' not understood") | ||
|
||
def to_gbq( | ||
self, | ||
|
@@ -1836,9 +1837,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None): | |
formats.append(dtype_mapping) | ||
else: | ||
element = "row" if i < index_len else "column" | ||
msg = ("Invalid dtype {dtype} specified for {element} {name}").format( | ||
dtype=dtype_mapping, element=element, name=name | ||
) | ||
msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}" | ||
raise ValueError(msg) | ||
|
||
return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats}) | ||
|
@@ -2307,7 +2306,7 @@ def info( | |
lines.append(self.index._summary()) | ||
|
||
if len(self.columns) == 0: | ||
lines.append("Empty {name}".format(name=type(self).__name__)) | ||
lines.append(f"Empty {type(self).__name__}") | ||
fmt.buffer_put_lines(buf, lines) | ||
return | ||
|
||
|
@@ -2335,10 +2334,7 @@ def _verbose_repr(): | |
counts = self.count() | ||
if len(cols) != len(counts): # pragma: no cover | ||
raise AssertionError( | ||
"Columns must equal counts " | ||
"({cols:d} != {counts:d})".format( | ||
cols=len(cols), counts=len(counts) | ||
) | ||
f"Columns must equal counts ({len(cols)} != {len(counts)})" | ||
) | ||
tmpl = "{count} non-null {dtype}" | ||
|
||
|
@@ -2382,7 +2378,7 @@ def _sizeof_fmt(num, size_qualifier): | |
|
||
counts = self._data.get_dtype_counts() | ||
dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())] | ||
lines.append("dtypes: {types}".format(types=", ".join(dtypes))) | ||
lines.append(f"dtypes: {', '.join(dtypes)}") | ||
|
||
if memory_usage is None: | ||
memory_usage = get_option("display.memory_usage") | ||
|
@@ -2399,12 +2395,7 @@ def _sizeof_fmt(num, size_qualifier): | |
if "object" in counts or self.index._is_memory_usage_qualified(): | ||
size_qualifier = "+" | ||
mem_usage = self.memory_usage(index=True, deep=deep).sum() | ||
lines.append( | ||
"memory usage: {mem}\n".format( | ||
mem=_sizeof_fmt(mem_usage, size_qualifier) | ||
) | ||
) | ||
|
||
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") | ||
fmt.buffer_put_lines(buf, lines) | ||
|
||
def memory_usage(self, index=True, deep=False): | ||
|
@@ -3069,8 +3060,8 @@ def query(self, expr, inplace=False, **kwargs): | |
""" | ||
inplace = validate_bool_kwarg(inplace, "inplace") | ||
if not isinstance(expr, str): | ||
msg = "expr must be a string to be evaluated, {0} given" | ||
raise ValueError(msg.format(type(expr))) | ||
msg = f"expr must be a string to be evaluated, {type(expr)} given" | ||
raise ValueError(msg) | ||
kwargs["level"] = kwargs.pop("level", 0) + 1 | ||
kwargs["target"] = None | ||
res = self.eval(expr, **kwargs) | ||
|
@@ -3287,11 +3278,7 @@ def select_dtypes(self, include=None, exclude=None): | |
|
||
# can't both include AND exclude! | ||
if not include.isdisjoint(exclude): | ||
raise ValueError( | ||
"include and exclude overlap on {inc_ex}".format( | ||
inc_ex=(include & exclude) | ||
) | ||
) | ||
raise ValueError(f"include and exclude overlap on {(include & exclude)}") | ||
|
||
# We raise when both include and exclude are empty | ||
# Hence, we can just shrink the columns we want to keep | ||
|
@@ -4128,15 +4115,13 @@ def set_index( | |
try: | ||
found = col in self.columns | ||
except TypeError: | ||
raise TypeError( | ||
err_msg + " Received column of type {}".format(type(col)) | ||
) | ||
raise TypeError(f"{err_msg}. Received column of type {type(col)}") | ||
else: | ||
if not found: | ||
missing.append(col) | ||
|
||
if missing: | ||
raise KeyError("None of {} are in the columns".format(missing)) | ||
raise KeyError(f"None of {missing} are in the columns") | ||
|
||
if inplace: | ||
frame = self | ||
|
@@ -4180,17 +4165,15 @@ def set_index( | |
# check newest element against length of calling frame, since | ||
# ensure_index_from_sequences would not raise for append=False. | ||
raise ValueError( | ||
"Length mismatch: Expected {len_self} rows, " | ||
"received array of length {len_col}".format( | ||
len_self=len(self), len_col=len(arrays[-1]) | ||
) | ||
f"Length mismatch: Expected {len(self)} rows, " | ||
f"received array of length {len(arrays[-1])}" | ||
) | ||
|
||
index = ensure_index_from_sequences(arrays, names) | ||
|
||
if verify_integrity and not index.is_unique: | ||
duplicates = index[index.duplicated()].unique() | ||
raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates)) | ||
raise ValueError(f"Index has duplicate keys: {duplicates}") | ||
|
||
# use set to handle duplicate column names gracefully in case of drop | ||
for c in set(to_remove): | ||
|
@@ -4205,8 +4188,13 @@ def set_index( | |
return frame | ||
|
||
def reset_index( | ||
self, level=None, drop=False, inplace=False, col_level=0, col_fill="" | ||
): | ||
self, | ||
level: Optional[Union[Hashable, Sequence[Hashable]]] = None, | ||
drop: bool = False, | ||
inplace: bool = False, | ||
col_level: Hashable = 0, | ||
col_fill: Optional[Hashable] = "", | ||
) -> Optional["DataFrame"]: | ||
""" | ||
Reset the index, or a level of it. | ||
|
||
|
@@ -4234,8 +4222,8 @@ def reset_index( | |
|
||
Returns | ||
------- | ||
DataFrame | ||
DataFrame with the new index. | ||
DataFrame or None | ||
DataFrame with the new index or None if ``inplace=True``. | ||
|
||
See Also | ||
-------- | ||
|
@@ -4400,6 +4388,7 @@ def _maybe_casted_values(index, labels=None): | |
new_index = self.index.droplevel(level) | ||
|
||
if not drop: | ||
to_insert: Iterable[Tuple[Any, Optional[Any]]] | ||
if isinstance(self.index, ABCMultiIndex): | ||
names = [ | ||
(n if n is not None else f"level_{i}") | ||
|
@@ -4422,8 +4411,7 @@ def _maybe_casted_values(index, labels=None): | |
if len(col_name) not in (1, self.columns.nlevels): | ||
raise ValueError( | ||
"col_fill=None is incompatible " | ||
"with incomplete column name " | ||
"{}".format(name) | ||
f"with incomplete column name {name}" | ||
) | ||
col_fill = col_name[0] | ||
|
||
|
@@ -4440,6 +4428,8 @@ def _maybe_casted_values(index, labels=None): | |
if not inplace: | ||
return new_obj | ||
|
||
return None | ||
|
||
# ---------------------------------------------------------------------- | ||
# Reindex-based selection methods | ||
|
||
|
@@ -4589,7 +4579,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): | |
mask = count > 0 | ||
else: | ||
if how is not None: | ||
raise ValueError("invalid how option: {h}".format(h=how)) | ||
raise ValueError(f"invalid how option: {how}") | ||
else: | ||
raise TypeError("must specify how or thresh") | ||
|
||
|
@@ -4600,7 +4590,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): | |
else: | ||
return result | ||
|
||
def drop_duplicates(self, subset=None, keep="first", inplace=False): | ||
def drop_duplicates( | ||
self, | ||
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, | ||
keep: Union[str, bool] = "first", | ||
inplace: bool = False, | ||
) -> Optional["DataFrame"]: | ||
""" | ||
Return DataFrame with duplicate rows removed. | ||
|
||
|
@@ -4623,6 +4618,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): | |
Returns | ||
------- | ||
DataFrame | ||
DataFrame with duplicates removed or None if ``inplace=True``. | ||
""" | ||
if self.empty: | ||
return self.copy() | ||
|
@@ -4637,7 +4633,13 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): | |
else: | ||
return self[-duplicated] | ||
|
||
def duplicated(self, subset=None, keep="first"): | ||
return None | ||
|
||
def duplicated( | ||
self, | ||
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, | ||
keep: Union[str, bool] = "first", | ||
) -> "Series": | ||
""" | ||
Return boolean Series denoting duplicate rows. | ||
|
||
|
@@ -4681,6 +4683,9 @@ def f(vals): | |
): | ||
subset = (subset,) | ||
|
||
# needed for mypy since can't narrow types using np.iterable | ||
subset = cast(Iterable, subset) | ||
|
||
# Verify all columns in subset exist in the queried dataframe | ||
# Otherwise, raise a KeyError, same as if you try to __getitem__ with a | ||
# key that doesn't exist. | ||
|
@@ -6030,6 +6035,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": | |
raise ValueError("columns must be unique") | ||
|
||
df = self.reset_index(drop=True) | ||
# TODO: use overload to refine return type of reset_index | ||
assert df is not None # needed for mypy | ||
result = df[column].explode() | ||
result = df.drop([column], axis=1).join(result) | ||
result.index = self.index.take(result.index) | ||
|
@@ -7208,7 +7215,7 @@ def corr(self, method="pearson", min_periods=1): | |
raise ValueError( | ||
"method must be either 'pearson', " | ||
"'spearman', 'kendall', or a callable, " | ||
"'{method}' was supplied".format(method=method) | ||
f"'{method}' was supplied" | ||
) | ||
|
||
return self._constructor(correl, index=idx, columns=cols) | ||
|
@@ -7399,9 +7406,9 @@ def c(x): | |
|
||
else: | ||
raise ValueError( | ||
"Invalid method {method} was passed, " | ||
f"Invalid method {method} was passed, " | ||
"valid methods are: 'pearson', 'kendall', " | ||
"'spearman', or callable".format(method=method) | ||
"'spearman', or callable" | ||
) | ||
|
||
if not drop: | ||
|
@@ -7531,8 +7538,7 @@ def _count_level(self, level, axis=0, numeric_only=False): | |
|
||
if not isinstance(count_axis, ABCMultiIndex): | ||
raise TypeError( | ||
"Can only count levels on hierarchical " | ||
"{ax}.".format(ax=self._get_axis_name(axis)) | ||
f"Can only count levels on hierarchical {self._get_axis_name(axis)}." | ||
) | ||
|
||
if frame._is_mixed_type: | ||
|
@@ -7590,8 +7596,8 @@ def _get_data(axis_matters): | |
data = self._get_bool_data() | ||
else: # pragma: no cover | ||
msg = ( | ||
"Generating numeric_only data with filter_type {f}" | ||
"not supported.".format(f=filter_type) | ||
f"Generating numeric_only data with filter_type {filter_type} " | ||
"not supported." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this appears to have a missing space in the message. do we have a test for this message? |
||
) | ||
raise NotImplementedError(msg) | ||
return data | ||
|
@@ -8000,7 +8006,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True): | |
elif axis == 1: | ||
new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) | ||
else: # pragma: no cover | ||
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis)) | ||
raise AssertionError(f"Axis must be 0 or 1. Got {axis}") | ||
|
||
return self._constructor(new_data) | ||
|
||
|
@@ -8034,7 +8040,7 @@ def to_period(self, freq=None, axis=0, copy=True): | |
elif axis == 1: | ||
new_data.set_axis(0, self.columns.to_period(freq=freq)) | ||
else: # pragma: no cover | ||
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis)) | ||
raise AssertionError(f"Axis must be 0 or 1. Got {axis}") | ||
|
||
return self._constructor(new_data) | ||
|
||
|
@@ -8123,8 +8129,8 @@ def isin(self, values): | |
else: | ||
if not is_list_like(values): | ||
raise TypeError( | ||
f"only list-like or dict-like objects are allowed " | ||
f"to be passed to DataFrame.isin(), " | ||
"only list-like or dict-like objects are allowed " | ||
"to be passed to DataFrame.isin(), " | ||
f"you passed a {repr(type(values).__name__)}" | ||
) | ||
return DataFrame( | ||
|
@@ -8170,4 +8176,4 @@ def _from_nested_dict(data): | |
|
||
|
||
def _put_str(s, space): | ||
return "{s}".format(s=s)[:space].ljust(space) | ||
return str(s)[:space].ljust(space) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is None a valid value in a sequence of labels?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is, check this out:
outputs:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should probably be Sequence[Optional[Hashable]] in that case.