15
15
import sys
16
16
from textwrap import dedent
17
17
from typing import (
18
+ Any ,
18
19
FrozenSet ,
19
20
Hashable ,
20
21
Iterable ,
25
26
Tuple ,
26
27
Type ,
27
28
Union ,
29
+ cast ,
28
30
)
29
31
import warnings
30
32
@@ -477,7 +479,7 @@ def __init__(
477
479
except (ValueError , TypeError ) as e :
478
480
exc = TypeError (
479
481
"DataFrame constructor called with "
480
- "incompatible data and dtype: {e}" . format ( e = e )
482
+ f "incompatible data and dtype: { e } "
481
483
)
482
484
raise exc from e
483
485
@@ -1114,8 +1116,7 @@ def dot(self, other):
1114
1116
rvals = np .asarray (other )
1115
1117
if lvals .shape [1 ] != rvals .shape [0 ]:
1116
1118
raise ValueError (
1117
- "Dot product shape mismatch, "
1118
- "{s} vs {r}" .format (s = lvals .shape , r = rvals .shape )
1119
+ f"Dot product shape mismatch, { lvals .shape } vs { rvals .shape } "
1119
1120
)
1120
1121
1121
1122
if isinstance (other , DataFrame ):
@@ -1131,7 +1132,7 @@ def dot(self, other):
1131
1132
else :
1132
1133
return Series (result , index = left .index )
1133
1134
else : # pragma: no cover
1134
- raise TypeError ("unsupported type: {oth}" . format ( oth = type (other )) )
1135
+ raise TypeError (f "unsupported type: { type (other )} " )
1135
1136
1136
1137
def __matmul__ (self , other ):
1137
1138
"""
@@ -1419,7 +1420,7 @@ def to_dict(self, orient="dict", into=dict):
1419
1420
for t in self .itertuples (name = None )
1420
1421
)
1421
1422
else :
1422
- raise ValueError ("orient '{o }' not understood" . format ( o = orient ) )
1423
+ raise ValueError (f "orient '{ orient } ' not understood" )
1423
1424
1424
1425
def to_gbq (
1425
1426
self ,
@@ -1838,9 +1839,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
1838
1839
formats .append (dtype_mapping )
1839
1840
else :
1840
1841
element = "row" if i < index_len else "column"
1841
- msg = ("Invalid dtype {dtype} specified for {element} {name}" ).format (
1842
- dtype = dtype_mapping , element = element , name = name
1843
- )
1842
+ msg = f"Invalid dtype { dtype_mapping } specified for { element } { name } "
1844
1843
raise ValueError (msg )
1845
1844
1846
1845
return np .rec .fromarrays (arrays , dtype = {"names" : names , "formats" : formats })
@@ -2309,7 +2308,7 @@ def info(
2309
2308
lines .append (self .index ._summary ())
2310
2309
2311
2310
if len (self .columns ) == 0 :
2312
- lines .append ("Empty {name}" . format ( name = type (self ).__name__ ) )
2311
+ lines .append (f "Empty { type (self ).__name__ } " )
2313
2312
fmt .buffer_put_lines (buf , lines )
2314
2313
return
2315
2314
@@ -2337,10 +2336,7 @@ def _verbose_repr():
2337
2336
counts = self .count ()
2338
2337
if len (cols ) != len (counts ): # pragma: no cover
2339
2338
raise AssertionError (
2340
- "Columns must equal counts "
2341
- "({cols:d} != {counts:d})" .format (
2342
- cols = len (cols ), counts = len (counts )
2343
- )
2339
+ f"Columns must equal counts ({ len (cols )} != { len (counts )} )"
2344
2340
)
2345
2341
tmpl = "{count} non-null {dtype}"
2346
2342
@@ -2384,7 +2380,7 @@ def _sizeof_fmt(num, size_qualifier):
2384
2380
2385
2381
counts = self ._data .get_dtype_counts ()
2386
2382
dtypes = ["{k}({kk:d})" .format (k = k [0 ], kk = k [1 ]) for k in sorted (counts .items ())]
2387
- lines .append ("dtypes: {types}" . format ( types = ", " .join (dtypes )) )
2383
+ lines .append (f "dtypes: { ', ' .join (dtypes )} " )
2388
2384
2389
2385
if memory_usage is None :
2390
2386
memory_usage = get_option ("display.memory_usage" )
@@ -2401,12 +2397,7 @@ def _sizeof_fmt(num, size_qualifier):
2401
2397
if "object" in counts or self .index ._is_memory_usage_qualified ():
2402
2398
size_qualifier = "+"
2403
2399
mem_usage = self .memory_usage (index = True , deep = deep ).sum ()
2404
- lines .append (
2405
- "memory usage: {mem}\n " .format (
2406
- mem = _sizeof_fmt (mem_usage , size_qualifier )
2407
- )
2408
- )
2409
-
2400
+ lines .append (f"memory usage: { _sizeof_fmt (mem_usage , size_qualifier )} \n " )
2410
2401
fmt .buffer_put_lines (buf , lines )
2411
2402
2412
2403
def memory_usage (self , index = True , deep = False ):
@@ -3071,8 +3062,8 @@ def query(self, expr, inplace=False, **kwargs):
3071
3062
"""
3072
3063
inplace = validate_bool_kwarg (inplace , "inplace" )
3073
3064
if not isinstance (expr , str ):
3074
- msg = "expr must be a string to be evaluated, {0 } given"
3075
- raise ValueError (msg . format ( type ( expr )) )
3065
+ msg = f "expr must be a string to be evaluated, { type ( expr ) } given"
3066
+ raise ValueError (msg )
3076
3067
kwargs ["level" ] = kwargs .pop ("level" , 0 ) + 1
3077
3068
kwargs ["target" ] = None
3078
3069
res = self .eval (expr , ** kwargs )
@@ -3289,11 +3280,7 @@ def select_dtypes(self, include=None, exclude=None):
3289
3280
3290
3281
# can't both include AND exclude!
3291
3282
if not include .isdisjoint (exclude ):
3292
- raise ValueError (
3293
- "include and exclude overlap on {inc_ex}" .format (
3294
- inc_ex = (include & exclude )
3295
- )
3296
- )
3283
+ raise ValueError (f"include and exclude overlap on { (include & exclude )} " )
3297
3284
3298
3285
# We raise when both include and exclude are empty
3299
3286
# Hence, we can just shrink the columns we want to keep
@@ -4130,15 +4117,13 @@ def set_index(
4130
4117
try :
4131
4118
found = col in self .columns
4132
4119
except TypeError :
4133
- raise TypeError (
4134
- err_msg + " Received column of type {}" .format (type (col ))
4135
- )
4120
+ raise TypeError (f"{ err_msg } . Received column of type { type (col )} " )
4136
4121
else :
4137
4122
if not found :
4138
4123
missing .append (col )
4139
4124
4140
4125
if missing :
4141
- raise KeyError ("None of {} are in the columns" . format ( missing ) )
4126
+ raise KeyError (f "None of { missing } are in the columns" )
4142
4127
4143
4128
if inplace :
4144
4129
frame = self
@@ -4182,17 +4167,15 @@ def set_index(
4182
4167
# check newest element against length of calling frame, since
4183
4168
# ensure_index_from_sequences would not raise for append=False.
4184
4169
raise ValueError (
4185
- "Length mismatch: Expected {len_self} rows, "
4186
- "received array of length {len_col}" .format (
4187
- len_self = len (self ), len_col = len (arrays [- 1 ])
4188
- )
4170
+ f"Length mismatch: Expected { len (self )} rows, "
4171
+ f"received array of length { len (arrays [- 1 ])} "
4189
4172
)
4190
4173
4191
4174
index = ensure_index_from_sequences (arrays , names )
4192
4175
4193
4176
if verify_integrity and not index .is_unique :
4194
4177
duplicates = index [index .duplicated ()].unique ()
4195
- raise ValueError ("Index has duplicate keys: {dup}" . format ( dup = duplicates ) )
4178
+ raise ValueError (f "Index has duplicate keys: { duplicates } " )
4196
4179
4197
4180
# use set to handle duplicate column names gracefully in case of drop
4198
4181
for c in set (to_remove ):
@@ -4207,8 +4190,13 @@ def set_index(
4207
4190
return frame
4208
4191
4209
4192
def reset_index (
4210
- self , level = None , drop = False , inplace = False , col_level = 0 , col_fill = ""
4211
- ):
4193
+ self ,
4194
+ level : Optional [Union [Hashable , Sequence [Hashable ]]] = None ,
4195
+ drop : bool = False ,
4196
+ inplace : bool = False ,
4197
+ col_level : Hashable = 0 ,
4198
+ col_fill : Optional [Hashable ] = "" ,
4199
+ ) -> Optional ["DataFrame" ]:
4212
4200
"""
4213
4201
Reset the index, or a level of it.
4214
4202
@@ -4236,8 +4224,8 @@ def reset_index(
4236
4224
4237
4225
Returns
4238
4226
-------
4239
- DataFrame
4240
- DataFrame with the new index.
4227
+ DataFrame or None
4228
+ DataFrame with the new index or None if ``inplace=True`` .
4241
4229
4242
4230
See Also
4243
4231
--------
@@ -4402,6 +4390,7 @@ def _maybe_casted_values(index, labels=None):
4402
4390
new_index = self .index .droplevel (level )
4403
4391
4404
4392
if not drop :
4393
+ to_insert : Iterable [Tuple [Any , Optional [Any ]]]
4405
4394
if isinstance (self .index , ABCMultiIndex ):
4406
4395
names = [
4407
4396
(n if n is not None else f"level_{ i } " )
@@ -4424,8 +4413,7 @@ def _maybe_casted_values(index, labels=None):
4424
4413
if len (col_name ) not in (1 , self .columns .nlevels ):
4425
4414
raise ValueError (
4426
4415
"col_fill=None is incompatible "
4427
- "with incomplete column name "
4428
- "{}" .format (name )
4416
+ f"with incomplete column name { name } "
4429
4417
)
4430
4418
col_fill = col_name [0 ]
4431
4419
@@ -4442,6 +4430,8 @@ def _maybe_casted_values(index, labels=None):
4442
4430
if not inplace :
4443
4431
return new_obj
4444
4432
4433
+ return None
4434
+
4445
4435
# ----------------------------------------------------------------------
4446
4436
# Reindex-based selection methods
4447
4437
@@ -4591,7 +4581,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
4591
4581
mask = count > 0
4592
4582
else :
4593
4583
if how is not None :
4594
- raise ValueError ("invalid how option: {h}" . format ( h = how ) )
4584
+ raise ValueError (f "invalid how option: { how } " )
4595
4585
else :
4596
4586
raise TypeError ("must specify how or thresh" )
4597
4587
@@ -4602,7 +4592,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
4602
4592
else :
4603
4593
return result
4604
4594
4605
- def drop_duplicates (self , subset = None , keep = "first" , inplace = False ):
4595
+ def drop_duplicates (
4596
+ self ,
4597
+ subset : Optional [Union [Hashable , Sequence [Hashable ]]] = None ,
4598
+ keep : Union [str , bool ] = "first" ,
4599
+ inplace : bool = False ,
4600
+ ) -> Optional ["DataFrame" ]:
4606
4601
"""
4607
4602
Return DataFrame with duplicate rows removed.
4608
4603
@@ -4625,6 +4620,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
4625
4620
Returns
4626
4621
-------
4627
4622
DataFrame
4623
+ DataFrame with duplicates removed or None if ``inplace=True``.
4628
4624
"""
4629
4625
if self .empty :
4630
4626
return self .copy ()
@@ -4639,7 +4635,13 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
4639
4635
else :
4640
4636
return self [- duplicated ]
4641
4637
4642
- def duplicated (self , subset = None , keep = "first" ):
4638
+ return None
4639
+
4640
+ def duplicated (
4641
+ self ,
4642
+ subset : Optional [Union [Hashable , Sequence [Hashable ]]] = None ,
4643
+ keep : Union [str , bool ] = "first" ,
4644
+ ) -> "Series" :
4643
4645
"""
4644
4646
Return boolean Series denoting duplicate rows.
4645
4647
@@ -4683,6 +4685,9 @@ def f(vals):
4683
4685
):
4684
4686
subset = (subset ,)
4685
4687
4688
+ # needed for mypy since can't narrow types using np.iterable
4689
+ subset = cast (Iterable , subset )
4690
+
4686
4691
# Verify all columns in subset exist in the queried dataframe
4687
4692
# Otherwise, raise a KeyError, same as if you try to __getitem__ with a
4688
4693
# key that doesn't exist.
@@ -6032,6 +6037,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
6032
6037
raise ValueError ("columns must be unique" )
6033
6038
6034
6039
df = self .reset_index (drop = True )
6040
+ # TODO: use overload to refine return type of reset_index
6041
+ assert df is not None # needed for mypy
6035
6042
result = df [column ].explode ()
6036
6043
result = df .drop ([column ], axis = 1 ).join (result )
6037
6044
result .index = self .index .take (result .index )
@@ -7210,7 +7217,7 @@ def corr(self, method="pearson", min_periods=1):
7210
7217
raise ValueError (
7211
7218
"method must be either 'pearson', "
7212
7219
"'spearman', 'kendall', or a callable, "
7213
- "'{method}' was supplied" . format ( method = method )
7220
+ f "'{ method } ' was supplied"
7214
7221
)
7215
7222
7216
7223
return self ._constructor (correl , index = idx , columns = cols )
@@ -7401,9 +7408,9 @@ def c(x):
7401
7408
7402
7409
else :
7403
7410
raise ValueError (
7404
- "Invalid method {method} was passed, "
7411
+ f "Invalid method { method } was passed, "
7405
7412
"valid methods are: 'pearson', 'kendall', "
7406
- "'spearman', or callable" . format ( method = method )
7413
+ "'spearman', or callable"
7407
7414
)
7408
7415
7409
7416
if not drop :
@@ -7533,8 +7540,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
7533
7540
7534
7541
if not isinstance (count_axis , ABCMultiIndex ):
7535
7542
raise TypeError (
7536
- "Can only count levels on hierarchical "
7537
- "{ax}." .format (ax = self ._get_axis_name (axis ))
7543
+ f"Can only count levels on hierarchical { self ._get_axis_name (axis )} ."
7538
7544
)
7539
7545
7540
7546
if frame ._is_mixed_type :
@@ -7592,8 +7598,8 @@ def _get_data(axis_matters):
7592
7598
data = self ._get_bool_data ()
7593
7599
else : # pragma: no cover
7594
7600
msg = (
7595
- "Generating numeric_only data with filter_type {f} "
7596
- "not supported." . format ( f = filter_type )
7601
+ f "Generating numeric_only data with filter_type { filter_type } "
7602
+ "not supported."
7597
7603
)
7598
7604
raise NotImplementedError (msg )
7599
7605
return data
@@ -8002,7 +8008,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True):
8002
8008
elif axis == 1 :
8003
8009
new_data .set_axis (0 , self .columns .to_timestamp (freq = freq , how = how ))
8004
8010
else : # pragma: no cover
8005
- raise AssertionError ("Axis must be 0 or 1. Got {ax!s}" . format ( ax = axis ) )
8011
+ raise AssertionError (f "Axis must be 0 or 1. Got { axis } " )
8006
8012
8007
8013
return self ._constructor (new_data )
8008
8014
@@ -8036,7 +8042,7 @@ def to_period(self, freq=None, axis=0, copy=True):
8036
8042
elif axis == 1 :
8037
8043
new_data .set_axis (0 , self .columns .to_period (freq = freq ))
8038
8044
else : # pragma: no cover
8039
- raise AssertionError ("Axis must be 0 or 1. Got {ax!s}" . format ( ax = axis ) )
8045
+ raise AssertionError (f "Axis must be 0 or 1. Got { axis } " )
8040
8046
8041
8047
return self ._constructor (new_data )
8042
8048
@@ -8125,8 +8131,8 @@ def isin(self, values):
8125
8131
else :
8126
8132
if not is_list_like (values ):
8127
8133
raise TypeError (
8128
- f "only list-like or dict-like objects are allowed "
8129
- f "to be passed to DataFrame.isin(), "
8134
+ "only list-like or dict-like objects are allowed "
8135
+ "to be passed to DataFrame.isin(), "
8130
8136
f"you passed a { repr (type (values ).__name__ )} "
8131
8137
)
8132
8138
return DataFrame (
@@ -8168,4 +8174,4 @@ def _from_nested_dict(data):
8168
8174
8169
8175
8170
8176
def _put_str (s , space ):
8171
- return "{s}" . format ( s = s )[:space ].ljust (space )
8177
+ return str ( s )[:space ].ljust (space )
0 commit comments