Skip to content

Commit 359cc5c

Browse files
twoertweinphofl
authored andcommitted
TYP: type all arguments with str default values (pandas-dev#48508)
* TYP: type all arguments with str default values * na_rep: back to str * na(t)_rep is always a string * add float for some functions * and the same for the few float default arguments * define a few more literal constants * avoid itertools.cycle mypy error * revert mistake
1 parent c6b0f0f commit 359cc5c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+419
-207
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ repos:
268268
|/_testing/
269269
- id: autotyping
270270
name: autotyping
271-
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bool-param
271+
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bool-param --bytes-param --str-param --float-param
272272
types_or: [python, pyi]
273273
files: ^pandas
274274
exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)

pandas/_testing/__init__.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
set_locale,
2626
)
2727

28-
from pandas._typing import Dtype
28+
from pandas._typing import (
29+
Dtype,
30+
Frequency,
31+
)
2932
from pandas.compat import pa_version_under1p01
3033

3134
from pandas.core.dtypes.common import (
@@ -401,13 +404,17 @@ def makeFloatIndex(k=10, name=None) -> Float64Index:
401404
return Float64Index(base_idx)
402405

403406

404-
def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:
407+
def makeDateIndex(
408+
k: int = 10, freq: Frequency = "B", name=None, **kwargs
409+
) -> DatetimeIndex:
405410
dt = datetime(2000, 1, 1)
406411
dr = bdate_range(dt, periods=k, freq=freq, name=name)
407412
return DatetimeIndex(dr, name=name, **kwargs)
408413

409414

410-
def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex:
415+
def makeTimedeltaIndex(
416+
k: int = 10, freq: Frequency = "D", name=None, **kwargs
417+
) -> TimedeltaIndex:
411418
return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs)
412419

413420

@@ -484,7 +491,7 @@ def getSeriesData() -> dict[str, Series]:
484491
return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)}
485492

486493

487-
def makeTimeSeries(nper=None, freq="B", name=None) -> Series:
494+
def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series:
488495
if nper is None:
489496
nper = _N
490497
return Series(
@@ -498,7 +505,7 @@ def makePeriodSeries(nper=None, name=None) -> Series:
498505
return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name)
499506

500507

501-
def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]:
508+
def getTimeSeriesData(nper=None, freq: Frequency = "B") -> dict[str, Series]:
502509
return {c: makeTimeSeries(nper, freq) for c in getCols(_K)}
503510

504511

@@ -507,7 +514,7 @@ def getPeriodData(nper=None) -> dict[str, Series]:
507514

508515

509516
# make frame
510-
def makeTimeDataFrame(nper=None, freq="B") -> DataFrame:
517+
def makeTimeDataFrame(nper=None, freq: Frequency = "B") -> DataFrame:
511518
data = getTimeSeriesData(nper, freq)
512519
return DataFrame(data)
513520

@@ -542,7 +549,7 @@ def makePeriodFrame(nper=None) -> DataFrame:
542549
def makeCustomIndex(
543550
nentries,
544551
nlevels,
545-
prefix="#",
552+
prefix: str = "#",
546553
names: bool | str | list[str] | None = False,
547554
ndupe_l=None,
548555
idx_type=None,
@@ -760,7 +767,7 @@ def makeCustomDataframe(
760767
return DataFrame(data, index, columns, dtype=dtype)
761768

762769

763-
def _create_missing_idx(nrows, ncols, density, random_state=None):
770+
def _create_missing_idx(nrows, ncols, density: float, random_state=None):
764771
if random_state is None:
765772
random_state = np.random
766773
else:
@@ -787,7 +794,7 @@ def _gen_unique_rand(rng, _extra_size):
787794
return i.tolist(), j.tolist()
788795

789796

790-
def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame:
797+
def makeMissingDataframe(density: float = 0.9, random_state=None) -> DataFrame:
791798
df = makeDataFrame()
792799
i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
793800
df.values[i, j] = np.nan

pandas/_testing/_io.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def dec(f):
117117
@optional_args # type: ignore[misc]
118118
def network(
119119
t,
120-
url="https://www.google.com",
120+
url: str = "https://www.google.com",
121121
raise_on_error: bool = False,
122122
check_before_test: bool = False,
123123
error_classes=None,
@@ -369,7 +369,7 @@ def round_trip_localpath(writer, reader, path: str | None = None):
369369
return obj
370370

371371

372-
def write_to_compressed(compression, path, data, dest="test"):
372+
def write_to_compressed(compression, path, data, dest: str = "test"):
373373
"""
374374
Write data to a compressed file.
375375

pandas/_testing/_random.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import numpy as np
44

5+
from pandas._typing import NpDtype
6+
57

68
def randbool(size=(), p: float = 0.5):
79
return np.random.rand(*size) <= p
@@ -14,7 +16,7 @@ def randbool(size=(), p: float = 0.5):
1416
)
1517

1618

17-
def rands_array(nchars, size, dtype="O", replace: bool = True) -> np.ndarray:
19+
def rands_array(nchars, size, dtype: NpDtype = "O", replace: bool = True) -> np.ndarray:
1820
"""
1921
Generate an array of byte strings.
2022
"""

pandas/_testing/asserters.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def assert_index_equal(
311311
"""
312312
__tracebackhide__ = True
313313

314-
def _check_types(left, right, obj="Index") -> None:
314+
def _check_types(left, right, obj: str = "Index") -> None:
315315
if not exact:
316316
return
317317

@@ -429,7 +429,9 @@ def _get_ilevel_values(index, level):
429429
assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
430430

431431

432-
def assert_class_equal(left, right, exact: bool | str = True, obj="Input") -> None:
432+
def assert_class_equal(
433+
left, right, exact: bool | str = True, obj: str = "Input"
434+
) -> None:
433435
"""
434436
Checks classes are equal.
435437
"""
@@ -527,7 +529,7 @@ def assert_categorical_equal(
527529
right,
528530
check_dtype: bool = True,
529531
check_category_order: bool = True,
530-
obj="Categorical",
532+
obj: str = "Categorical",
531533
) -> None:
532534
"""
533535
Test that Categoricals are equivalent.
@@ -584,7 +586,7 @@ def assert_categorical_equal(
584586

585587

586588
def assert_interval_array_equal(
587-
left, right, exact="equiv", obj="IntervalArray"
589+
left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
588590
) -> None:
589591
"""
590592
Test that two IntervalArrays are equivalent.
@@ -614,15 +616,15 @@ def assert_interval_array_equal(
614616
assert_attr_equal("closed", left, right, obj=obj)
615617

616618

617-
def assert_period_array_equal(left, right, obj="PeriodArray") -> None:
619+
def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
618620
_check_isinstance(left, right, PeriodArray)
619621

620622
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
621623
assert_attr_equal("freq", left, right, obj=obj)
622624

623625

624626
def assert_datetime_array_equal(
625-
left, right, obj="DatetimeArray", check_freq: bool = True
627+
left, right, obj: str = "DatetimeArray", check_freq: bool = True
626628
) -> None:
627629
__tracebackhide__ = True
628630
_check_isinstance(left, right, DatetimeArray)
@@ -634,7 +636,7 @@ def assert_datetime_array_equal(
634636

635637

636638
def assert_timedelta_array_equal(
637-
left, right, obj="TimedeltaArray", check_freq: bool = True
639+
left, right, obj: str = "TimedeltaArray", check_freq: bool = True
638640
) -> None:
639641
__tracebackhide__ = True
640642
_check_isinstance(left, right, TimedeltaArray)
@@ -693,7 +695,7 @@ def assert_numpy_array_equal(
693695
check_dtype: bool | Literal["equiv"] = True,
694696
err_msg=None,
695697
check_same=None,
696-
obj="numpy array",
698+
obj: str = "numpy array",
697699
index_values=None,
698700
) -> None:
699701
"""
@@ -887,7 +889,7 @@ def assert_series_equal(
887889
check_flags: bool = True,
888890
rtol: float = 1.0e-5,
889891
atol: float = 1.0e-8,
890-
obj="Series",
892+
obj: str = "Series",
891893
*,
892894
check_index: bool = True,
893895
check_like: bool = False,
@@ -1157,7 +1159,7 @@ def assert_frame_equal(
11571159
check_flags: bool = True,
11581160
rtol: float = 1.0e-5,
11591161
atol: float = 1.0e-8,
1160-
obj="DataFrame",
1162+
obj: str = "DataFrame",
11611163
) -> None:
11621164
"""
11631165
Check that left and right DataFrame are equal.

pandas/_typing.py

+14
Original file line numberDiff line numberDiff line change
@@ -332,3 +332,17 @@ def closed(self) -> bool:
332332

333333
# dropna
334334
AnyAll = Literal["any", "all"]
335+
336+
MatplotlibColor = Union[str, Sequence[float]]
337+
TimeGrouperOrigin = Union[
338+
"Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
339+
]
340+
TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"]
341+
TimeNonexistent = Union[
342+
Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
343+
]
344+
DropKeep = Literal["first", "last", False]
345+
CorrelationMethod = Union[
346+
Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float]
347+
]
348+
AlignJoin = Literal["outer", "inner", "left", "right"]

pandas/core/arrays/arrow/array.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._typing import (
1313
Dtype,
1414
PositionalIndexer,
15+
SortKind,
1516
TakeIndexer,
1617
npt,
1718
)
@@ -472,7 +473,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
472473
def argsort(
473474
self,
474475
ascending: bool = True,
475-
kind: str = "quicksort",
476+
kind: SortKind = "quicksort",
476477
na_position: str = "last",
477478
*args,
478479
**kwargs,

pandas/core/arrays/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
ScalarIndexer,
3838
SequenceIndexer,
3939
Shape,
40+
SortKind,
4041
TakeIndexer,
4142
npt,
4243
)
@@ -671,7 +672,7 @@ def _values_for_argsort(self) -> np.ndarray:
671672
def argsort(
672673
self,
673674
ascending: bool = True,
674-
kind: str = "quicksort",
675+
kind: SortKind = "quicksort",
675676
na_position: str = "last",
676677
*args,
677678
**kwargs,

pandas/core/arrays/categorical.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
NpDtype,
4545
Ordered,
4646
Shape,
47+
SortKind,
4748
npt,
4849
type_t,
4950
)
@@ -1827,7 +1828,7 @@ def check_for_ordered(self, op) -> None:
18271828
# error: Signature of "argsort" incompatible with supertype "ExtensionArray"
18281829
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
18291830
def argsort( # type: ignore[override]
1830-
self, ascending: bool = True, kind="quicksort", **kwargs
1831+
self, ascending: bool = True, kind: SortKind = "quicksort", **kwargs
18311832
):
18321833
"""
18331834
Return the indices that would sort the Categorical.
@@ -2200,7 +2201,9 @@ def _repr_footer(self) -> str:
22002201
info = self._repr_categories_info()
22012202
return f"Length: {len(self)}\n{info}"
22022203

2203-
def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str:
2204+
def _get_repr(
2205+
self, length: bool = True, na_rep: str = "NaN", footer: bool = True
2206+
) -> str:
22042207
from pandas.io.formats import format as fmt
22052208

22062209
formatter = fmt.CategoricalFormatter(
@@ -2716,7 +2719,7 @@ def _str_map(
27162719
result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype)
27172720
return take_nd(result, codes, fill_value=na_value)
27182721

2719-
def _str_get_dummies(self, sep="|"):
2722+
def _str_get_dummies(self, sep: str = "|"):
27202723
# sep may not be in categories. Just bail on this.
27212724
from pandas.core.arrays import PandasArray
27222725

pandas/core/arrays/datetimelike.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@
6363
PositionalIndexerTuple,
6464
ScalarIndexer,
6565
SequenceIndexer,
66+
TimeAmbiguous,
67+
TimeNonexistent,
6668
npt,
6769
)
6870
from pandas.compat.numpy import function as nv
@@ -308,7 +310,7 @@ def asi8(self) -> npt.NDArray[np.int64]:
308310
# Rendering Methods
309311

310312
def _format_native_types(
311-
self, *, na_rep="NaT", date_format=None
313+
self, *, na_rep: str | float = "NaT", date_format=None
312314
) -> npt.NDArray[np.object_]:
313315
"""
314316
Helper method for astype when converting to strings.
@@ -556,7 +558,7 @@ def _concat_same_type(
556558
new_obj._freq = new_freq
557559
return new_obj
558560

559-
def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT:
561+
def copy(self: DatetimeLikeArrayT, order: str = "C") -> DatetimeLikeArrayT:
560562
# error: Unexpected keyword argument "order" for "copy"
561563
new_obj = super().copy(order=order) # type: ignore[call-arg]
562564
new_obj._freq = self.freq
@@ -2085,15 +2087,30 @@ def _round(self, freq, mode, ambiguous, nonexistent):
20852087
return self._simple_new(result, dtype=self.dtype)
20862088

20872089
@Appender((_round_doc + _round_example).format(op="round"))
2088-
def round(self, freq, ambiguous="raise", nonexistent="raise"):
2090+
def round(
2091+
self,
2092+
freq,
2093+
ambiguous: TimeAmbiguous = "raise",
2094+
nonexistent: TimeNonexistent = "raise",
2095+
):
20892096
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
20902097

20912098
@Appender((_round_doc + _floor_example).format(op="floor"))
2092-
def floor(self, freq, ambiguous="raise", nonexistent="raise"):
2099+
def floor(
2100+
self,
2101+
freq,
2102+
ambiguous: TimeAmbiguous = "raise",
2103+
nonexistent: TimeNonexistent = "raise",
2104+
):
20932105
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
20942106

20952107
@Appender((_round_doc + _ceil_example).format(op="ceil"))
2096-
def ceil(self, freq, ambiguous="raise", nonexistent="raise"):
2108+
def ceil(
2109+
self,
2110+
freq,
2111+
ambiguous: TimeAmbiguous = "raise",
2112+
nonexistent: TimeNonexistent = "raise",
2113+
):
20972114
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
20982115

20992116
# --------------------------------------------------------------

0 commit comments

Comments
 (0)