Skip to content

Commit 282a0e4

Browse files
mck619WillAyd
authored andcommitted
added f strings and typing to frame.py (#30021)
1 parent 5db4097 commit 282a0e4

File tree

2 files changed

+67
-58
lines changed

2 files changed

+67
-58
lines changed

pandas/core/frame.py

+63-57
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sys
1616
from textwrap import dedent
1717
from typing import (
18+
Any,
1819
FrozenSet,
1920
Hashable,
2021
Iterable,
@@ -25,6 +26,7 @@
2526
Tuple,
2627
Type,
2728
Union,
29+
cast,
2830
)
2931
import warnings
3032

@@ -477,7 +479,7 @@ def __init__(
477479
except (ValueError, TypeError) as e:
478480
exc = TypeError(
479481
"DataFrame constructor called with "
480-
"incompatible data and dtype: {e}".format(e=e)
482+
f"incompatible data and dtype: {e}"
481483
)
482484
raise exc from e
483485

@@ -1114,8 +1116,7 @@ def dot(self, other):
11141116
rvals = np.asarray(other)
11151117
if lvals.shape[1] != rvals.shape[0]:
11161118
raise ValueError(
1117-
"Dot product shape mismatch, "
1118-
"{s} vs {r}".format(s=lvals.shape, r=rvals.shape)
1119+
f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
11191120
)
11201121

11211122
if isinstance(other, DataFrame):
@@ -1131,7 +1132,7 @@ def dot(self, other):
11311132
else:
11321133
return Series(result, index=left.index)
11331134
else: # pragma: no cover
1134-
raise TypeError("unsupported type: {oth}".format(oth=type(other)))
1135+
raise TypeError(f"unsupported type: {type(other)}")
11351136

11361137
def __matmul__(self, other):
11371138
"""
@@ -1419,7 +1420,7 @@ def to_dict(self, orient="dict", into=dict):
14191420
for t in self.itertuples(name=None)
14201421
)
14211422
else:
1422-
raise ValueError("orient '{o}' not understood".format(o=orient))
1423+
raise ValueError(f"orient '{orient}' not understood")
14231424

14241425
def to_gbq(
14251426
self,
@@ -1838,9 +1839,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
18381839
formats.append(dtype_mapping)
18391840
else:
18401841
element = "row" if i < index_len else "column"
1841-
msg = ("Invalid dtype {dtype} specified for {element} {name}").format(
1842-
dtype=dtype_mapping, element=element, name=name
1843-
)
1842+
msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}"
18441843
raise ValueError(msg)
18451844

18461845
return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats})
@@ -2309,7 +2308,7 @@ def info(
23092308
lines.append(self.index._summary())
23102309

23112310
if len(self.columns) == 0:
2312-
lines.append("Empty {name}".format(name=type(self).__name__))
2311+
lines.append(f"Empty {type(self).__name__}")
23132312
fmt.buffer_put_lines(buf, lines)
23142313
return
23152314

@@ -2337,10 +2336,7 @@ def _verbose_repr():
23372336
counts = self.count()
23382337
if len(cols) != len(counts): # pragma: no cover
23392338
raise AssertionError(
2340-
"Columns must equal counts "
2341-
"({cols:d} != {counts:d})".format(
2342-
cols=len(cols), counts=len(counts)
2343-
)
2339+
f"Columns must equal counts ({len(cols)} != {len(counts)})"
23442340
)
23452341
tmpl = "{count} non-null {dtype}"
23462342

@@ -2384,7 +2380,7 @@ def _sizeof_fmt(num, size_qualifier):
23842380

23852381
counts = self._data.get_dtype_counts()
23862382
dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
2387-
lines.append("dtypes: {types}".format(types=", ".join(dtypes)))
2383+
lines.append(f"dtypes: {', '.join(dtypes)}")
23882384

23892385
if memory_usage is None:
23902386
memory_usage = get_option("display.memory_usage")
@@ -2401,12 +2397,7 @@ def _sizeof_fmt(num, size_qualifier):
24012397
if "object" in counts or self.index._is_memory_usage_qualified():
24022398
size_qualifier = "+"
24032399
mem_usage = self.memory_usage(index=True, deep=deep).sum()
2404-
lines.append(
2405-
"memory usage: {mem}\n".format(
2406-
mem=_sizeof_fmt(mem_usage, size_qualifier)
2407-
)
2408-
)
2409-
2400+
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
24102401
fmt.buffer_put_lines(buf, lines)
24112402

24122403
def memory_usage(self, index=True, deep=False):
@@ -3071,8 +3062,8 @@ def query(self, expr, inplace=False, **kwargs):
30713062
"""
30723063
inplace = validate_bool_kwarg(inplace, "inplace")
30733064
if not isinstance(expr, str):
3074-
msg = "expr must be a string to be evaluated, {0} given"
3075-
raise ValueError(msg.format(type(expr)))
3065+
msg = f"expr must be a string to be evaluated, {type(expr)} given"
3066+
raise ValueError(msg)
30763067
kwargs["level"] = kwargs.pop("level", 0) + 1
30773068
kwargs["target"] = None
30783069
res = self.eval(expr, **kwargs)
@@ -3289,11 +3280,7 @@ def select_dtypes(self, include=None, exclude=None):
32893280

32903281
# can't both include AND exclude!
32913282
if not include.isdisjoint(exclude):
3292-
raise ValueError(
3293-
"include and exclude overlap on {inc_ex}".format(
3294-
inc_ex=(include & exclude)
3295-
)
3296-
)
3283+
raise ValueError(f"include and exclude overlap on {(include & exclude)}")
32973284

32983285
# We raise when both include and exclude are empty
32993286
# Hence, we can just shrink the columns we want to keep
@@ -4130,15 +4117,13 @@ def set_index(
41304117
try:
41314118
found = col in self.columns
41324119
except TypeError:
4133-
raise TypeError(
4134-
err_msg + " Received column of type {}".format(type(col))
4135-
)
4120+
raise TypeError(f"{err_msg}. Received column of type {type(col)}")
41364121
else:
41374122
if not found:
41384123
missing.append(col)
41394124

41404125
if missing:
4141-
raise KeyError("None of {} are in the columns".format(missing))
4126+
raise KeyError(f"None of {missing} are in the columns")
41424127

41434128
if inplace:
41444129
frame = self
@@ -4182,17 +4167,15 @@ def set_index(
41824167
# check newest element against length of calling frame, since
41834168
# ensure_index_from_sequences would not raise for append=False.
41844169
raise ValueError(
4185-
"Length mismatch: Expected {len_self} rows, "
4186-
"received array of length {len_col}".format(
4187-
len_self=len(self), len_col=len(arrays[-1])
4188-
)
4170+
f"Length mismatch: Expected {len(self)} rows, "
4171+
f"received array of length {len(arrays[-1])}"
41894172
)
41904173

41914174
index = ensure_index_from_sequences(arrays, names)
41924175

41934176
if verify_integrity and not index.is_unique:
41944177
duplicates = index[index.duplicated()].unique()
4195-
raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates))
4178+
raise ValueError(f"Index has duplicate keys: {duplicates}")
41964179

41974180
# use set to handle duplicate column names gracefully in case of drop
41984181
for c in set(to_remove):
@@ -4207,8 +4190,13 @@ def set_index(
42074190
return frame
42084191

42094192
def reset_index(
4210-
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
4211-
):
4193+
self,
4194+
level: Optional[Union[Hashable, Sequence[Hashable]]] = None,
4195+
drop: bool = False,
4196+
inplace: bool = False,
4197+
col_level: Hashable = 0,
4198+
col_fill: Optional[Hashable] = "",
4199+
) -> Optional["DataFrame"]:
42124200
"""
42134201
Reset the index, or a level of it.
42144202
@@ -4236,8 +4224,8 @@ def reset_index(
42364224
42374225
Returns
42384226
-------
4239-
DataFrame
4240-
DataFrame with the new index.
4227+
DataFrame or None
4228+
DataFrame with the new index or None if ``inplace=True``.
42414229
42424230
See Also
42434231
--------
@@ -4402,6 +4390,7 @@ def _maybe_casted_values(index, labels=None):
44024390
new_index = self.index.droplevel(level)
44034391

44044392
if not drop:
4393+
to_insert: Iterable[Tuple[Any, Optional[Any]]]
44054394
if isinstance(self.index, ABCMultiIndex):
44064395
names = [
44074396
(n if n is not None else f"level_{i}")
@@ -4424,8 +4413,7 @@ def _maybe_casted_values(index, labels=None):
44244413
if len(col_name) not in (1, self.columns.nlevels):
44254414
raise ValueError(
44264415
"col_fill=None is incompatible "
4427-
"with incomplete column name "
4428-
"{}".format(name)
4416+
f"with incomplete column name {name}"
44294417
)
44304418
col_fill = col_name[0]
44314419

@@ -4442,6 +4430,8 @@ def _maybe_casted_values(index, labels=None):
44424430
if not inplace:
44434431
return new_obj
44444432

4433+
return None
4434+
44454435
# ----------------------------------------------------------------------
44464436
# Reindex-based selection methods
44474437

@@ -4591,7 +4581,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
45914581
mask = count > 0
45924582
else:
45934583
if how is not None:
4594-
raise ValueError("invalid how option: {h}".format(h=how))
4584+
raise ValueError(f"invalid how option: {how}")
45954585
else:
45964586
raise TypeError("must specify how or thresh")
45974587

@@ -4602,7 +4592,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
46024592
else:
46034593
return result
46044594

4605-
def drop_duplicates(self, subset=None, keep="first", inplace=False):
4595+
def drop_duplicates(
4596+
self,
4597+
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
4598+
keep: Union[str, bool] = "first",
4599+
inplace: bool = False,
4600+
) -> Optional["DataFrame"]:
46064601
"""
46074602
Return DataFrame with duplicate rows removed.
46084603
@@ -4625,6 +4620,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
46254620
Returns
46264621
-------
46274622
DataFrame
4623+
DataFrame with duplicates removed or None if ``inplace=True``.
46284624
"""
46294625
if self.empty:
46304626
return self.copy()
@@ -4639,7 +4635,13 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
46394635
else:
46404636
return self[-duplicated]
46414637

4642-
def duplicated(self, subset=None, keep="first"):
4638+
return None
4639+
4640+
def duplicated(
4641+
self,
4642+
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
4643+
keep: Union[str, bool] = "first",
4644+
) -> "Series":
46434645
"""
46444646
Return boolean Series denoting duplicate rows.
46454647
@@ -4683,6 +4685,9 @@ def f(vals):
46834685
):
46844686
subset = (subset,)
46854687

4688+
# needed for mypy since can't narrow types using np.iterable
4689+
subset = cast(Iterable, subset)
4690+
46864691
# Verify all columns in subset exist in the queried dataframe
46874692
# Otherwise, raise a KeyError, same as if you try to __getitem__ with a
46884693
# key that doesn't exist.
@@ -6032,6 +6037,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
60326037
raise ValueError("columns must be unique")
60336038

60346039
df = self.reset_index(drop=True)
6040+
# TODO: use overload to refine return type of reset_index
6041+
assert df is not None # needed for mypy
60356042
result = df[column].explode()
60366043
result = df.drop([column], axis=1).join(result)
60376044
result.index = self.index.take(result.index)
@@ -7210,7 +7217,7 @@ def corr(self, method="pearson", min_periods=1):
72107217
raise ValueError(
72117218
"method must be either 'pearson', "
72127219
"'spearman', 'kendall', or a callable, "
7213-
"'{method}' was supplied".format(method=method)
7220+
f"'{method}' was supplied"
72147221
)
72157222

72167223
return self._constructor(correl, index=idx, columns=cols)
@@ -7401,9 +7408,9 @@ def c(x):
74017408

74027409
else:
74037410
raise ValueError(
7404-
"Invalid method {method} was passed, "
7411+
f"Invalid method {method} was passed, "
74057412
"valid methods are: 'pearson', 'kendall', "
7406-
"'spearman', or callable".format(method=method)
7413+
"'spearman', or callable"
74077414
)
74087415

74097416
if not drop:
@@ -7533,8 +7540,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
75337540

75347541
if not isinstance(count_axis, ABCMultiIndex):
75357542
raise TypeError(
7536-
"Can only count levels on hierarchical "
7537-
"{ax}.".format(ax=self._get_axis_name(axis))
7543+
f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
75387544
)
75397545

75407546
if frame._is_mixed_type:
@@ -7592,8 +7598,8 @@ def _get_data(axis_matters):
75927598
data = self._get_bool_data()
75937599
else: # pragma: no cover
75947600
msg = (
7595-
"Generating numeric_only data with filter_type {f}"
7596-
"not supported.".format(f=filter_type)
7601+
f"Generating numeric_only data with filter_type {filter_type} "
7602+
"not supported."
75977603
)
75987604
raise NotImplementedError(msg)
75997605
return data
@@ -8002,7 +8008,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True):
80028008
elif axis == 1:
80038009
new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))
80048010
else: # pragma: no cover
8005-
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
8011+
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
80068012

80078013
return self._constructor(new_data)
80088014

@@ -8036,7 +8042,7 @@ def to_period(self, freq=None, axis=0, copy=True):
80368042
elif axis == 1:
80378043
new_data.set_axis(0, self.columns.to_period(freq=freq))
80388044
else: # pragma: no cover
8039-
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
8045+
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
80408046

80418047
return self._constructor(new_data)
80428048

@@ -8125,8 +8131,8 @@ def isin(self, values):
81258131
else:
81268132
if not is_list_like(values):
81278133
raise TypeError(
8128-
f"only list-like or dict-like objects are allowed "
8129-
f"to be passed to DataFrame.isin(), "
8134+
"only list-like or dict-like objects are allowed "
8135+
"to be passed to DataFrame.isin(), "
81308136
f"you passed a {repr(type(values).__name__)}"
81318137
)
81328138
return DataFrame(
@@ -8168,4 +8174,4 @@ def _from_nested_dict(data):
81688174

81698175

81708176
def _put_str(s, space):
8171-
return "{s}".format(s=s)[:space].ljust(space)
8177+
return str(s)[:space].ljust(space)

pandas/core/reshape/merge.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ def _groupby_and_merge(
126126
on = [on]
127127

128128
if right.duplicated(by + on).any():
129-
right = right.drop_duplicates(by + on, keep="last")
129+
_right = right.drop_duplicates(by + on, keep="last")
130+
# TODO: use overload to refine return type of drop_duplicates
131+
assert _right is not None # needed for mypy
132+
right = _right
130133
rby = right.groupby(by, sort=False)
131134
except KeyError:
132135
rby = None

0 commit comments

Comments
 (0)