From 11eddd79cb89ff745ee79144d66ef2801ed16c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 9 Sep 2022 18:30:25 -0400 Subject: [PATCH 1/8] TYP: type all arguments with str default values --- .pre-commit-config.yaml | 2 +- pandas/_testing/__init__.py | 21 ++++++++++------ pandas/_testing/_io.py | 7 +++--- pandas/_testing/_random.py | 4 ++- pandas/_testing/asserters.py | 24 ++++++++++-------- pandas/_typing.py | 9 +++++++ pandas/core/arrays/arrow/array.py | 3 ++- pandas/core/arrays/base.py | 3 ++- pandas/core/arrays/categorical.py | 12 ++++++--- pandas/core/arrays/datetimelike.py | 27 ++++++++++++++++---- pandas/core/arrays/datetimes.py | 35 ++++++++++++++++++-------- pandas/core/arrays/interval.py | 5 ++-- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 12 ++++++--- pandas/core/base.py | 2 +- pandas/core/frame.py | 9 ++++--- pandas/core/generic.py | 18 +++++++------ pandas/core/groupby/groupby.py | 10 +++++++- pandas/core/indexes/base.py | 12 +++++---- pandas/core/indexes/category.py | 6 +++-- pandas/core/indexes/datetimelike.py | 4 +-- pandas/core/indexes/datetimes.py | 19 +++++++++----- pandas/core/indexes/interval.py | 6 ++--- pandas/core/indexes/multi.py | 31 +++++++++++++++++------ pandas/core/indexes/numeric.py | 8 +++++- pandas/core/indexes/range.py | 2 +- pandas/core/indexing.py | 2 +- pandas/core/internals/array_manager.py | 3 ++- pandas/core/internals/blocks.py | 30 +++++++++++++++------- pandas/core/internals/managers.py | 3 ++- pandas/core/missing.py | 4 ++- pandas/core/nanops.py | 13 ++++++++-- pandas/core/resample.py | 35 +++++++++++++++++--------- pandas/core/reshape/encoding.py | 8 +++--- pandas/core/reshape/melt.py | 7 ++++-- pandas/core/reshape/pivot.py | 28 ++++++++++++++------- pandas/core/strings/accessor.py | 30 ++++++++++++++-------- pandas/core/strings/base.py | 16 +++++++++--- pandas/core/strings/object_array.py | 16 +++++++++--- pandas/core/tools/datetimes.py | 8 +++--- pandas/core/tools/numeric.py | 13 ++++++++-- pandas/core/tools/timedeltas.py | 8 ++++-- pandas/core/tools/times.py | 5 +++- pandas/core/window/ewm.py | 2 +- pandas/io/formats/format.py | 4 +-- pandas/io/formats/style.py | 10 +++++--- pandas/io/formats/style_render.py | 14 +++++------ pandas/io/parquet.py | 7 ++++-- pandas/io/pytables.py | 2 +- pandas/io/sql.py | 13 +++++----- pandas/plotting/_matplotlib/boxplot.py | 16 ++++++++++-- pandas/plotting/_matplotlib/core.py | 6 ++++- pandas/plotting/_matplotlib/style.py | 6 +---- pyright_reportGeneralTypeIssues.json | 2 ++ 54 files changed, 414 insertions(+), 190 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 31f317636bd69..75c80f515359c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -258,7 +258,7 @@ repos: |/_testing/ - id: autotyping name: autotyping - entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics + entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bytes-param --str-param types_or: [python, pyi] files: ^pandas exclude: ^(pandas/tests|pandas/io/clipboard) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 1035fd08a1a36..54d743523e56f 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -25,7 +25,10 @@ set_locale, ) -from pandas._typing import Dtype +from pandas._typing import ( + Dtype, + Frequency, +) from pandas.compat import pa_version_under1p01 from pandas.core.dtypes.common import ( @@ -401,13 +404,17 @@ def makeFloatIndex(k=10, name=None) -> Float64Index: return Float64Index(base_idx) -def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex: +def makeDateIndex( + k: int = 10, freq: Frequency = "B", name=None, **kwargs +) -> DatetimeIndex: dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k, freq=freq, name=name) return DatetimeIndex(dr, name=name, **kwargs) -def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex: +def makeTimedeltaIndex( + k: int = 10, freq: Frequency = "D", name=None, **kwargs +) -> TimedeltaIndex: return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) @@ -483,7 +490,7 @@ def getSeriesData() -> dict[str, Series]: return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)} -def makeTimeSeries(nper=None, freq="B", name=None) -> Series: +def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: if nper is None: nper = _N return Series( @@ -497,7 +504,7 @@ def makePeriodSeries(nper=None, name=None) -> Series: return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name) -def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]: +def getTimeSeriesData(nper=None, freq: Frequency = "B") -> dict[str, Series]: return {c: makeTimeSeries(nper, freq) for c in getCols(_K)} @@ -506,7 +513,7 @@ def getPeriodData(nper=None) -> dict[str, Series]: # make frame -def makeTimeDataFrame(nper=None, freq="B") -> DataFrame: +def makeTimeDataFrame(nper=None, freq: Frequency = "B") -> DataFrame: data = getTimeSeriesData(nper, freq) return DataFrame(data) @@ -541,7 +548,7 @@ def makePeriodFrame(nper=None) -> DataFrame: def makeCustomIndex( nentries, nlevels, - prefix="#", + prefix: str = "#", names: bool | str | list[str] | None = False, ndupe_l=None, idx_type=None, diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index d1acdff8d2fd7..f71e20dbdf69d 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -113,10 +113,11 @@ def dec(f): return wrapper -@optional_args +# error: Untyped decorator makes function "network" untyped +@optional_args # type: ignore[misc] def network( t, - url="https://www.google.com", + url: str = "https://www.google.com", raise_on_error=False, check_before_test=False, error_classes=None, @@ -368,7 +369,7 @@ def round_trip_localpath(writer, reader, path: str | None = None): return obj -def write_to_compressed(compression, path, data, dest="test"): +def write_to_compressed(compression, path, data, dest: str = "test"): """ Write data to a compressed file. diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py index 880fffea21bd1..5942e3266c8bb 100644 --- a/pandas/_testing/_random.py +++ b/pandas/_testing/_random.py @@ -2,6 +2,8 @@ import numpy as np +from pandas._typing import NpDtype + def randbool(size=(), p: float = 0.5): return np.random.rand(*size) <= p @@ -14,7 +16,7 @@ def randbool(size=(), p: float = 0.5): ) -def rands_array(nchars, size, dtype="O", replace=True) -> np.ndarray: +def rands_array(nchars, size, dtype: NpDtype = "O", replace=True) -> np.ndarray: """ Generate an array of byte strings. """ diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3858670850074..05a7c883d51ad 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -311,7 +311,7 @@ def assert_index_equal( """ __tracebackhide__ = True - def _check_types(left, right, obj="Index") -> None: + def _check_types(left, right, obj: str = "Index") -> None: if not exact: return @@ -429,7 +429,9 @@ def _get_ilevel_values(index, level): assert_categorical_equal(left._values, right._values, obj=f"{obj} category") -def assert_class_equal(left, right, exact: bool | str = True, obj="Input") -> None: +def assert_class_equal( + left, right, exact: bool | str = True, obj: str = "Input" +) -> None: """ Checks classes are equal. """ @@ -523,7 +525,7 @@ def assert_is_sorted(seq) -> None: def assert_categorical_equal( - left, right, check_dtype=True, check_category_order=True, obj="Categorical" + left, right, check_dtype=True, check_category_order=True, obj: str = "Categorical" ) -> None: """ Test that Categoricals are equivalent. @@ -580,7 +582,7 @@ def assert_categorical_equal( def assert_interval_array_equal( - left, right, exact="equiv", obj="IntervalArray" + left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray" ) -> None: """ Test that two IntervalArrays are equivalent. @@ -610,7 +612,7 @@ def assert_interval_array_equal( assert_attr_equal("closed", left, right, obj=obj) -def assert_period_array_equal(left, right, obj="PeriodArray") -> None: +def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None: _check_isinstance(left, right, PeriodArray) assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") @@ -618,7 +620,7 @@ def assert_period_array_equal(left, right, obj="PeriodArray") -> None: def assert_datetime_array_equal( - left, right, obj="DatetimeArray", check_freq=True + left, right, obj: str = "DatetimeArray", check_freq=True ) -> None: __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) @@ -630,7 +632,7 @@ def assert_datetime_array_equal( def assert_timedelta_array_equal( - left, right, obj="TimedeltaArray", check_freq=True + left, right, obj: str = "TimedeltaArray", check_freq=True ) -> None: __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) @@ -689,7 +691,7 @@ def assert_numpy_array_equal( check_dtype: bool | Literal["equiv"] = True, err_msg=None, check_same=None, - obj="numpy array", + obj: str = "numpy array", index_values=None, ) -> None: """ @@ -883,7 +885,7 @@ def assert_series_equal( check_flags=True, rtol=1.0e-5, atol=1.0e-8, - obj="Series", + obj: str = "Series", *, check_index=True, check_like=False, @@ -1140,7 +1142,7 @@ def assert_frame_equal( right, check_dtype: bool | Literal["equiv"] = True, check_index_type: bool | Literal["equiv"] = "equiv", - check_column_type="equiv", + check_column_type: bool | Literal["equiv"] = "equiv", check_frame_type=True, check_less_precise=no_default, check_names=True, @@ -1153,7 +1155,7 @@ def assert_frame_equal( check_flags=True, rtol=1.0e-5, atol=1.0e-8, - obj="DataFrame", + obj: str = "DataFrame", ) -> None: """ Check that left and right DataFrame are equal. diff --git a/pandas/_typing.py b/pandas/_typing.py index dc51c04447bef..b35060e57ed07 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -331,3 +331,12 @@ def closed(self) -> bool: # dropna AnyAll = Literal["any", "all"] + +MatplotlibColor = Union[str, Sequence[float]] +TimeGrouperOrigin = Union[ + "Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"] +] +TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"] +TimeNonexistent = Union[ + Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta +] diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index cfae5b4cae681..16996c97896d6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -12,6 +12,7 @@ from pandas._typing import ( Dtype, PositionalIndexer, + SortKind, TakeIndexer, npt, ) @@ -472,7 +473,7 @@ def isna(self) -> npt.NDArray[np.bool_]: def argsort( self, ascending: bool = True, - kind: str = "quicksort", + kind: SortKind = "quicksort", na_position: str = "last", *args, **kwargs, diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index ef7c676be6797..0f3b654dda4b1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -36,6 +36,7 @@ ScalarIndexer, SequenceIndexer, Shape, + SortKind, TakeIndexer, npt, ) @@ -670,7 +671,7 @@ def _values_for_argsort(self) -> np.ndarray: def argsort( self, ascending: bool = True, - kind: str = "quicksort", + kind: SortKind = "quicksort", na_position: str = "last", *args, **kwargs, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 085c4563599fc..31e7e97422b65 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -43,6 +43,7 @@ NpDtype, Ordered, Shape, + SortKind, npt, type_t, ) @@ -1821,8 +1822,11 @@ def check_for_ordered(self, op) -> None: "Categorical to an ordered one\n" ) + # error: Signature of "argsort" incompatible with supertype "ExtensionArray" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def argsort(self, ascending=True, kind="quicksort", **kwargs): + def argsort( # type: ignore[override] + self, ascending: bool = True, kind: SortKind = "quicksort", **kwargs + ) -> npt.NDArray[np.intp]: """ Return the indices that would sort the Categorical. @@ -2194,7 +2198,9 @@ def _repr_footer(self) -> str: info = self._repr_categories_info() return f"Length: {len(self)}\n{info}" - def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: + def _get_repr( + self, length: bool = True, na_rep: str = "NaN", footer: bool = True + ) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( @@ -2710,7 +2716,7 @@ def _str_map( result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype) return take_nd(result, codes, fill_value=na_value) - def _str_get_dummies(self, sep="|"): + def _str_get_dummies(self, sep: str = "|"): # sep may not be in categories. Just bail on this. from pandas.core.arrays import PandasArray diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 26bcfb7439bca..bd0a118b0a81e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -60,6 +60,8 @@ PositionalIndexerTuple, ScalarIndexer, SequenceIndexer, + TimeAmbiguous, + TimeNonexistent, npt, ) from pandas.compat.numpy import function as nv @@ -311,7 +313,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep="NaT", date_format=None + self, *, na_rep: object = "NaT", date_format=None ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. @@ -559,7 +561,7 @@ def _concat_same_type( new_obj._freq = new_freq return new_obj - def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT: + def copy(self: DatetimeLikeArrayT, order: str = "C") -> DatetimeLikeArrayT: # error: Unexpected keyword argument "order" for "copy" new_obj = super().copy(order=order) # type: ignore[call-arg] new_obj._freq = self.freq @@ -2067,15 +2069,30 @@ def _round(self, freq, mode, ambiguous, nonexistent): return self._simple_new(result, dtype=self.dtype) @Appender((_round_doc + _round_example).format(op="round")) - def round(self, freq, ambiguous="raise", nonexistent="raise"): + def round( + self, + freq, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + ): return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) @Appender((_round_doc + _floor_example).format(op="floor")) - def floor(self, freq, ambiguous="raise", nonexistent="raise"): + def floor( + self, + freq, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + ): return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) @Appender((_round_doc + _ceil_example).format(op="ceil")) - def ceil(self, freq, ambiguous="raise", nonexistent="raise"): + def ceil( + self, + freq, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + ): return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) # -------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e96e9b44112d6..691b5ed5993b0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -41,7 +41,13 @@ tz_convert_from_utc, tzconversion, ) -from pandas._typing import npt +from pandas._typing import ( + DateTimeErrorChoices, + IntervalClosedType, + TimeAmbiguous, + TimeNonexistent, + npt, +) from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, @@ -298,7 +304,7 @@ def _from_sequence_not_strict( freq: str | BaseOffset | lib.NoDefault | None = lib.no_default, dayfirst: bool = False, yearfirst: bool = False, - ambiguous="raise", + ambiguous: TimeAmbiguous = "raise", ): explicit_none = freq is None freq = freq if freq is not lib.no_default else None @@ -333,8 +339,10 @@ def _from_sequence_not_strict( return result + # error: Signature of "_generate_range" incompatible with supertype + # "DatetimeLikeArrayMixin" @classmethod - def _generate_range( + def _generate_range( # type: ignore[override] cls, start, end, @@ -342,10 +350,10 @@ def _generate_range( freq, tz=None, normalize=False, - ambiguous="raise", - nonexistent="raise", - inclusive="both", - ): + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + inclusive: IntervalClosedType = "both", + ) -> DatetimeArray: periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): @@ -640,7 +648,7 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep="NaT", date_format=None, **kwargs + self, *, na_rep: object = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_datetime64_from_values @@ -812,7 +820,12 @@ def tz_convert(self, tz) -> DatetimeArray: return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) @dtl.ravel_compat - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: + def tz_localize( + self, + tz, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + ) -> DatetimeArray: """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1971,7 +1984,7 @@ def _sequence_to_dt64ns( tz=None, dayfirst: bool = False, yearfirst: bool = False, - ambiguous="raise", + ambiguous: TimeAmbiguous = "raise", *, allow_mixed: bool = False, require_iso8601: bool = False, @@ -2118,7 +2131,7 @@ def objects_to_datetime64ns( dayfirst, yearfirst, utc=False, - errors="raise", + errors: DateTimeErrorChoices = "raise", require_iso8601: bool = False, allow_object: bool = False, allow_mixed: bool = False, diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 446285da719af..a7a5258883fa4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -37,6 +37,7 @@ PositionalIndexer, ScalarIndexer, SequenceIndexer, + SortKind, npt, ) from pandas.compat.numpy import function as nv @@ -563,7 +564,7 @@ def from_arrays( def from_tuples( cls: type[IntervalArrayT], data, - closed="right", + closed: IntervalClosedType | None = "right", copy: bool = False, dtype: Dtype | None = None, ) -> IntervalArrayT: @@ -798,7 +799,7 @@ def __le__(self, other): def argsort( self, ascending: bool = True, - kind: str = "quicksort", + kind: SortKind = "quicksort", na_position: str = "last", *args, **kwargs, diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 91eeee936cba1..92a7f972941e4 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -642,7 +642,7 @@ def _formatter(self, boxed: bool = False): @dtl.ravel_compat def _format_native_types( - self, *, na_rep="NaT", date_format=None, **kwargs + self, *, na_rep: object = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: """ actually format my specific types diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 12e3e9813a816..9fc2ccdd6cb65 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -32,6 +32,7 @@ parse_timedelta_unit, ) from pandas._typing import ( + DateTimeErrorChoices, DtypeObj, NpDtype, npt, @@ -372,7 +373,7 @@ def _formatter(self, boxed: bool = False): return get_format_timedelta64(self, box=True) def _format_native_types( - self, *, na_rep="NaT", date_format=None, **kwargs + self, *, na_rep: object = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_timedelta64 @@ -845,7 +846,10 @@ def f(x): def sequence_to_td64ns( - data, copy: bool = False, unit=None, errors="raise" + data, + copy: bool = False, + unit=None, + errors: DateTimeErrorChoices = "raise", ) -> tuple[np.ndarray, Tick | None]: """ Parameters @@ -931,7 +935,7 @@ def sequence_to_td64ns( return data, inferred_freq -def ints_to_td64ns(data, unit="ns"): +def ints_to_td64ns(data, unit: str = "ns"): """ Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating the integers as multiples of the given timedelta unit. @@ -971,7 +975,7 @@ def ints_to_td64ns(data, unit="ns"): return data, copy_made -def _objects_to_td64ns(data, unit=None, errors="raise"): +def _objects_to_td64ns(data, unit=None, errors: DateTimeErrorChoices = "raise"): """ Convert a object-dtyped or string-dtyped array into an timedelta64[ns]-dtyped array. diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f3b31caf8986..274506d0983fa 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1303,7 +1303,7 @@ def searchsorted( sorter=sorter, ) - def drop_duplicates(self, keep="first"): + def drop_duplicates(self, keep: Literal["first", "last", False] = "first"): duplicated = self._duplicated(keep=keep) # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c52a7b0daa30e..b0d1d9c9f6c9c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8742,11 +8742,11 @@ def pivot_table( values=None, index=None, columns=None, - aggfunc="mean", + aggfunc: AggFuncType = "mean", fill_value=None, margins=False, dropna=True, - margins_name="All", + margins_name: Level = "All", observed=False, sort=True, ) -> DataFrame: @@ -9143,7 +9143,7 @@ def melt( id_vars=None, value_vars=None, var_name=None, - value_name="value", + value_name: Hashable = "value", col_level: Level = None, ignore_index: bool = True, ) -> DataFrame: @@ -10257,7 +10257,8 @@ def _series_round(ser: Series, decimals: int): def corr( self, - method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", min_periods: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aa9845a2abb78..c5ab827469e42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -69,7 +69,9 @@ StorageOptions, Suffixes, T, + TimeAmbiguous, TimedeltaConvertibleTypes, + TimeNonexistent, TimestampConvertibleTypes, ValueKeyFunc, WriteBuffer, @@ -2820,7 +2822,7 @@ def to_sql( name: str, con, schema: str | None = None, - if_exists: str = "fail", + if_exists: Literal["fail", "replace", "append"] = "fail", index: bool_t = True, index_label: IndexLabel = None, chunksize: int | None = None, @@ -4137,7 +4139,7 @@ def _check_is_chained_assignment_possible(self) -> bool_t: return False @final - def _check_setitem_copy(self, t="setting", force=False): + def _check_setitem_copy(self, t: str = "setting", force=False): """ Parameters @@ -4328,7 +4330,7 @@ def _is_view(self) -> bool_t: def reindex_like( self: NDFrameT, other, - method: str | None = None, + method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None, copy: bool_t = True, limit=None, tolerance=None, @@ -9493,7 +9495,7 @@ def align( def _align_frame( self, other, - join="outer", + join: Literal["outer", "inner", "left", "right"] = "outer", axis=None, level=None, copy: bool_t = True, @@ -9557,7 +9559,7 @@ def _align_frame( def _align_series( self, other, - join="outer", + join: Literal["outer", "inner", "left", "right"] = "outer", axis=None, level=None, copy: bool_t = True, @@ -10551,8 +10553,8 @@ def tz_localize( axis=0, level=None, copy: bool_t = True, - ambiguous="raise", - nonexistent: str = "raise", + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", ) -> NDFrameT: """ Localize tz-naive index of a Series or DataFrame to target time zone. @@ -10977,7 +10979,7 @@ def describe( def pct_change( self: NDFrameT, periods=1, - fill_method="pad", + fill_method: Literal["backfill", "bfill", "pad", "ffill"] = "pad", limit=None, freq=None, **kwargs, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b963b85b93a31..164bc222a1ea6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -46,6 +46,7 @@ class providing the base-class of operations. from pandas._typing import ( ArrayLike, Dtype, + FillnaOptions, IndexLabel, NDFrameT, PositionalIndexer, @@ -3960,7 +3961,14 @@ def diff(self, periods: int = 1, axis: int = 0) -> NDFrameT: @final @Substitution(name="groupby") @Appender(_common_see_also) - def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis=0): + def pct_change( + self, + periods=1, + fill_method: FillnaOptions = "ffill", + limit=None, + freq=None, + axis=0, + ): """ Calculate pct_change of each value to previous entry in group. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ee16857337df9..ce2a52e6e52b7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -993,7 +993,7 @@ def dtype(self) -> DtypeObj: return self._data.dtype @final - def ravel(self, order="C"): + def ravel(self, order: str_t = "C"): """ Return an ndarray of the flattened values of the underlying data. @@ -1441,7 +1441,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: str_t = "NaN", + na_rep: object = "NaN", ) -> list[str_t]: """ Render a string representation of the Index. @@ -1459,7 +1459,7 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: + def _format_with_header(self, header: list[str_t], na_rep: object) -> list[str_t]: from pandas.io.formats.format import format_array values = self._values @@ -1520,7 +1520,7 @@ def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: return values._format_native_types(**kwargs) def _format_native_types( - self, *, na_rep="", quoting=None, **kwargs + self, *, na_rep: object = "", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: """ Actually format specific types of the index. @@ -3028,7 +3028,9 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: return self._shallow_copy(result) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: + def drop_duplicates( + self: _IndexT, keep: Literal["first", "last", False] = "first" + ) -> _IndexT: """ Return Index with duplicate values removed. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d1bdedee5caa0..ef3e545505bf0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -358,9 +358,11 @@ def _format_attrs(self): extra = super()._format_attrs() return attrs + extra - def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: result = [ - pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep + pprint_thing(x, escape_chars=("\t", "\r", "\n")) + if notna(x) + else str(na_rep) for x in self._values ] return header + result diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 6867ef936d45e..70ba2b13154f4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -155,7 +155,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: str = "NaT", + na_rep: object = "NaT", date_format: str | None = None, ) -> list[str]: """ @@ -175,7 +175,7 @@ def format( return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( - self, header: list[str], na_rep: str = "NaT", date_format: str | None = None + self, header: list[str], na_rep: object = "NaT", date_format: str | None = None ) -> list[str]: # matches base class except for whitespace padding and date_format return header + list( diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b91fbb8244cb5..b667a6b06f7ad 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -26,7 +26,6 @@ lib, ) from pandas._libs.tslibs import ( - BaseOffset, Resolution, periods_per_day, timezones, @@ -37,8 +36,11 @@ from pandas._typing import ( Dtype, DtypeObj, + Frequency, IntervalClosedType, IntervalLeftRight, + TimeAmbiguous, + TimeNonexistent, npt, ) from pandas.util._decorators import ( @@ -278,7 +280,12 @@ def tz_convert(self, tz) -> DatetimeIndex: return type(self)._simple_new(arr, name=self.name) @doc(DatetimeArray.tz_localize) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex: + def tz_localize( + self, + tz, + ambiguous: TimeAmbiguous = "raise", + nonexistent: TimeNonexistent = "raise", + ) -> DatetimeIndex: arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) @@ -314,11 +321,11 @@ def isocalendar(self) -> DataFrame: def __new__( cls, data=None, - freq: str | BaseOffset | lib.NoDefault = lib.no_default, + freq: Frequency | lib.NoDefault = lib.no_default, tz=None, normalize: bool = False, closed=None, - ambiguous="raise", + ambiguous: TimeAmbiguous = "raise", dayfirst: bool = False, yearfirst: bool = False, dtype: Dtype | None = None, @@ -590,7 +597,7 @@ def to_series(self, keep_tz=lib.no_default, index=None, name=None): return Series(values, index=index, name=name) - def snap(self, freq="S") -> DatetimeIndex: + def snap(self, freq: Frequency = "S") -> DatetimeIndex: """ Snap time stamps to nearest occurring frequency. @@ -1142,7 +1149,7 @@ def bdate_range( start=None, end=None, periods: int | None = None, - freq="B", + freq: Frequency = "B", tz=None, normalize: bool = True, name: Hashable = None, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e686e8453f0d9..6b7133dfeaf8e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -311,7 +311,7 @@ def from_arrays( def from_tuples( cls, data, - closed: str = "right", + closed: IntervalClosedType = "right", name: Hashable = None, copy: bool = False, dtype: Dtype | None = None, @@ -816,12 +816,12 @@ def length(self) -> Index: # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: # matches base class except for whitespace padding return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types( - self, *, na_rep="NaN", quoting=None, **kwargs + self, *, na_rep: object = "NaN", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: # GH 28210: use base method but with different default na_rep return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1b35cc03f6fdd..e43f225de12e6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -33,6 +33,7 @@ AnyArrayLike, DtypeObj, F, + IgnoreRaise, Scalar, Shape, npt, @@ -1320,7 +1321,7 @@ def _formatter_func(self, tup): return tuple(func(val) for func, val in zip(formatter_funcs, tup)) def _format_native_types( - self, *, na_rep="nan", **kwargs + self, *, na_rep: object = "nan", **kwargs ) -> npt.NDArray[np.object_]: new_levels = [] new_codes = [] @@ -1334,7 +1335,9 @@ def _format_native_types( nan_index = len(level_strs) # numpy 1.21 deprecated implicit string casting level_strs = level_strs.astype(str) - level_strs = np.append(level_strs, na_rep) + # error: Argument 2 to "append" has incompatible type "object"; + # expected "Union[...]" + level_strs = np.append(level_strs, na_rep) # type: ignore[arg-type] assert not level_codes.flags.writeable # i.e. copy is needed level_codes = level_codes.copy() # make writeable level_codes[mask] = nan_index @@ -1359,7 +1362,7 @@ def format( self, name: bool | None = None, formatter: Callable | None = None, - na_rep: str | None = None, + na_rep: object = None, names: bool = False, space: int = 2, sparsify=None, @@ -1618,7 +1621,9 @@ def _inferred_type_levels(self) -> list[str]: return [i.inferred_type for i in self.levels] @doc(Index.duplicated) - def duplicated(self, keep="first") -> npt.NDArray[np.bool_]: + def duplicated( + self, keep: Literal["last", "first", False] = "first" + ) -> npt.NDArray[np.bool_]: shape = tuple(len(lev) for lev in self.levels) ids = get_group_index(self.codes, shape, sort=False, xnull=False) @@ -2248,7 +2253,13 @@ def repeat(self, repeats: int, axis=None) -> MultiIndex: verify_integrity=False, ) - def drop(self, codes, level=None, errors="raise"): + # error: Signature of "drop" incompatible with supertype "Index" + def drop( # type: ignore[override] + self, + codes, + level: Index | np.ndarray | Iterable[Hashable] | None = None, + errors: IgnoreRaise = "raise", + ) -> MultiIndex: """ Make new MultiIndex with passed list of codes deleted @@ -2302,7 +2313,9 @@ def drop(self, codes, level=None, errors="raise"): return self.delete(inds) - def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex: + def _drop_from_level( + self, codes, level, errors: IgnoreRaise = "raise" + ) -> MultiIndex: codes = com.index_labels_to_array(codes) i = self._get_level_number(level) index = self.levels[i] @@ -3835,7 +3848,9 @@ def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | No rename = set_names @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: + def drop_duplicates( + self, keep: Literal["first", "last", False] = "first" + ) -> MultiIndex: return super().drop_duplicates(keep=keep) # --------------------------------------------------------------- @@ -3875,7 +3890,7 @@ def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: return 0 -def sparsify_labels(label_list, start: int = 0, sentinel=""): +def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): pivoted = list(zip(*label_list)) k = len(label_list) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index d114fe47fa0f1..c37fb4929015b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -281,7 +281,13 @@ def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: raise TypeError("Unsafe NumPy casting, you must explicitly cast") def _format_native_types( - self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs + self, + *, + na_rep: object = "", + float_format=None, + decimal: str = ".", + quoting=None, + **kwargs, ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import FloatArrayFormatter diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9f49c7456d9ce..0e7917ccca0b7 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -228,7 +228,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: # Equivalent to Index implementation, but faster if not len(self._range): return header diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d415cbd035cd1..15b1b8b9a5c97 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1652,7 +1652,7 @@ def _get_setitem_indexer(self, key): # ------------------------------------------------------------------- - def _setitem_with_indexer(self, indexer, value, name="iloc"): + def _setitem_with_indexer(self, indexer, value, name: str = "iloc"): """ _setitem_with_indexer is for setting values on a Series/DataFrame using positional indexers. diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 53f8486074ef9..80781a970d1b5 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -22,6 +22,7 @@ from pandas._typing import ( ArrayLike, DtypeObj, + QuantileInterpolation, npt, ) from pandas.util._validators import validate_bool_kwarg @@ -1054,7 +1055,7 @@ def quantile( qs: Float64Index, axis: int = 0, transposed: bool = False, - interpolation="linear", + interpolation: QuantileInterpolation = "linear", ) -> ArrayManager: arrs = [ensure_block_shape(x, 2) for x in self.arrays] diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d49945b2a67cc..e54ed1e60a595 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -28,7 +28,9 @@ ArrayLike, DtypeObj, F, + FillnaOptions, IgnoreRaise, + QuantileInterpolation, Shape, npt, ) @@ -537,7 +539,7 @@ def astype( return newb @final - def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block: + def to_native_types(self, na_rep: object = "nan", quoting=None, **kwargs) -> Block: """convert to our native types format""" result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) return self.make_block(result) @@ -1048,7 +1050,7 @@ def putmask(self, mask, new) -> list[Block]: res_blocks.extend(rbs) return res_blocks - def where(self, other, cond, _downcast="infer") -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1207,7 +1209,7 @@ def fillna( def interpolate( self, - method: str = "pad", + method: FillnaOptions = "pad", axis: int = 0, index: Index | None = None, inplace: bool = False, @@ -1309,7 +1311,10 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo @final def quantile( - self, qs: Float64Index, interpolation="linear", axis: int = 0 + self, + qs: Float64Index, + interpolation: QuantileInterpolation = "linear", + axis: int = 0, ) -> Block: """ compute the quantiles of the @@ -1433,7 +1438,7 @@ def setitem(self, indexer, value): else: return self - def where(self, other, cond, _downcast="infer") -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: # _downcast private bc we only specify it when calling from fillna arr = self.values.T @@ -1606,8 +1611,15 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: def values_for_json(self) -> np.ndarray: return np.asarray(self.values) - def interpolate( - self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs + # error: Signature of "interpolate" incompatible with supertype "Block" + def interpolate( # type: ignore[override] + self, + method: FillnaOptions = "pad", + axis: int = 0, + inplace: bool = False, + limit: int | None = None, + fill_value=None, + **kwargs, ): values = self.values if values.ndim == 2 and axis == 0: @@ -2274,10 +2286,10 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: def to_native_types( values: ArrayLike, *, - na_rep="nan", + na_rep: object = "nan", quoting=None, float_format=None, - decimal=".", + decimal: str = ".", **kwargs, ) -> np.ndarray: """convert to our native types format""" diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cfacfc2b38553..41843701b32a4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -27,6 +27,7 @@ from pandas._typing import ( ArrayLike, DtypeObj, + QuantileInterpolation, Shape, npt, type_t, @@ -1578,7 +1579,7 @@ def quantile( *, qs: Float64Index, axis: int = 0, - interpolation="linear", + interpolation: QuantileInterpolation = "linear", ) -> T: """ Iterate over blocks applying quantile reduction. diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 997a7dbc9ceb8..9446752e69e9a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -621,7 +621,9 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): return P(x, nu=der) -def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): +def _cubicspline_interpolate( + xi, yi, x, axis=0, bc_type: str | tuple[Any, Any] = "not-a-knot", extrapolate=None +): """ Convenience function for cubic spline data interpolator. diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6658b25d09e6d..c9d8ce46c7cdf 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -6,6 +6,7 @@ from typing import ( Any, Callable, + Literal, cast, ) import warnings @@ -1531,7 +1532,12 @@ def _zero_out_fperr(arg): @disallow("M8", "m8") def nancorr( - a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None + a: np.ndarray, + b: np.ndarray, + *, + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", + min_periods: int | None = None, ) -> float: """ a, b: ndarrays @@ -1554,7 +1560,10 @@ def nancorr( return f(a, b) -def get_corr_func(method) -> Callable[[np.ndarray, np.ndarray], float]: +def get_corr_func( + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] +) -> Callable[[np.ndarray, np.ndarray], float]: if method == "kendall": from scipy.stats import kendalltau diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 85731bbde6d40..dd7347eaf7ade 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -27,10 +27,13 @@ to_offset, ) from pandas._typing import ( + Frequency, IndexLabel, NDFrameT, + QuantileInterpolation, T, TimedeltaConvertibleTypes, + TimeGrouperOrigin, TimestampConvertibleTypes, npt, ) @@ -893,11 +896,11 @@ def fillna(self, method, limit=None): @doc(NDFrame.interpolate, **_shared_docs_kwargs) def interpolate( self, - method="linear", + method: QuantileInterpolation = "linear", axis=0, limit=None, inplace=False, - limit_direction="forward", + limit_direction: Literal["forward", "backward", "both"] = "forward", limit_area=None, downcast=None, **kwargs, @@ -1557,12 +1560,14 @@ class TimeGrouper(Grouper): "offset", ) + origin: TimeGrouperOrigin + def __init__( self, - freq="Min", + freq: Frequency = "Min", closed: Literal["left", "right"] | None = None, label: Literal["left", "right"] | None = None, - how="mean", + how: str = "mean", axis=0, fill_method=None, limit=None, @@ -1570,7 +1575,8 @@ def __init__( kind: str | None = None, convention: Literal["start", "end", "e", "s"] | None = None, base: int | None = None, - origin: str | TimestampConvertibleTypes = "start_day", + origin: Literal["epoch", "start", "start_day", "end", "end_day"] + | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, group_keys: bool | lib.NoDefault = True, **kwargs, @@ -1621,7 +1627,12 @@ def __init__( self.group_keys = group_keys if origin in ("epoch", "start", "start_day", "end", "end_day"): - self.origin = origin + # error: Incompatible types in assignment (expression has type "Union[Union[ + # Timestamp, datetime, datetime64, signedinteger[_64Bit], float, str], + # Literal['epoch', 'start', 'start_day', 'end', 'end_day']]", variable has + # type "Union[Timestamp, Literal['epoch', 'start', 'start_day', 'end', + # 'end_day']]") + self.origin = origin # type: ignore[assignment] else: try: self.origin = Timestamp(origin) @@ -1947,7 +1958,7 @@ def _get_timestamp_range_edges( last: Timestamp, freq: BaseOffset, closed: Literal["right", "left"] = "left", - origin="start_day", + origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, ) -> tuple[Timestamp, Timestamp]: """ @@ -2025,7 +2036,7 @@ def _get_period_range_edges( last: Period, freq: BaseOffset, closed: Literal["right", "left"] = "left", - origin="start_day", + origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, ) -> tuple[Period, Period]: """ @@ -2099,7 +2110,7 @@ def _adjust_dates_anchored( last: Timestamp, freq: Tick, closed: Literal["right", "left"] = "right", - origin="start_day", + origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, ) -> tuple[Timestamp, Timestamp]: # First and last offsets should be calculated from the start day to fix an @@ -2115,11 +2126,11 @@ def _adjust_dates_anchored( elif isinstance(origin, Timestamp): origin_nanos = origin.value elif origin in ["end", "end_day"]: - origin = last if origin == "end" else last.ceil("D") - sub_freq_times = (origin.value - first.value) // freq.nanos + origin_last = last if origin == "end" else last.ceil("D") + sub_freq_times = (origin_last.value - first.value) // freq.nanos if closed == "left": sub_freq_times += 1 - first = origin - sub_freq_times * freq + first = origin_last - sub_freq_times * freq origin_nanos = first.value origin_nanos += offset.value if offset else 0 diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 9567a1053c9a6..1b20eb2c37d63 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -25,7 +25,7 @@ def get_dummies( data, prefix=None, - prefix_sep="_", + prefix_sep: str | list[str] | dict[str, str] = "_", dummy_na: bool = False, columns=None, sparse: bool = False, @@ -175,7 +175,9 @@ def check_len(item, name): # validate separators if isinstance(prefix_sep, str): - prefix_sep = itertools.cycle([prefix_sep]) + # error: Incompatible types in assignment (expression has type + # "cycle[str]", variable has type "str") + prefix_sep = itertools.cycle([prefix_sep]) # type: ignore[assignment] elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] @@ -221,7 +223,7 @@ def check_len(item, name): def _get_dummies_1d( data, prefix, - prefix_sep="_", + prefix_sep: str | list[str] | dict[str, str] = "_", dummy_na: bool = False, sparse: bool = False, drop_first: bool = False, diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 73f6aff82f330..f35430e37e964 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -2,7 +2,10 @@ import inspect import re -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Hashable, +) import warnings import numpy as np @@ -42,7 +45,7 @@ def melt( id_vars=None, value_vars=None, var_name=None, - value_name="value", + value_name: Hashable = "value", col_level=None, ignore_index: bool = True, ) -> DataFrame: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b14c49e735355..e78c9c8fabcbd 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -64,7 +64,7 @@ def pivot_table( fill_value=None, margins: bool = False, dropna: bool = True, - margins_name: str = "All", + margins_name: Hashable = "All", observed: bool = False, sort: bool = True, ) -> DataFrame: @@ -119,7 +119,7 @@ def __internal_pivot_table( fill_value, margins: bool, dropna: bool, - margins_name: str, + margins_name: Hashable, observed: bool, sort: bool, ) -> DataFrame: @@ -262,7 +262,7 @@ def _add_margins( cols, aggfunc, observed=None, - margins_name: str = "All", + margins_name: Hashable = "All", fill_value=None, ): if not isinstance(margins_name, str): @@ -334,7 +334,9 @@ def _add_margins( return result -def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = "All"): +def _compute_grand_margin( + data: DataFrame, values, aggfunc, margins_name: Hashable = "All" +): if values: grand_margin = {} @@ -357,7 +359,7 @@ def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = def _generate_marginal_results( - table, data, values, rows, cols, aggfunc, observed, margins_name: str = "All" + table, data, values, rows, cols, aggfunc, observed, margins_name: Hashable = "All" ): if len(cols) > 0: # need to "interleave" the margins @@ -427,7 +429,13 @@ def _all_key(key): def _generate_marginal_results_without_values( - table: DataFrame, data, rows, cols, aggfunc, observed, margins_name: str = "All" + table: DataFrame, + data, + rows, + cols, + aggfunc, + observed, + margins_name: Hashable = "All", ): if len(cols) > 0: # need to "interleave" the margins @@ -555,7 +563,7 @@ def crosstab( colnames=None, aggfunc=None, margins: bool = False, - margins_name: str = "All", + margins_name: Hashable = "All", dropna: bool = True, normalize=False, ) -> DataFrame: @@ -695,6 +703,8 @@ def crosstab( df["__dummy__"] = values kwargs = {"aggfunc": aggfunc} + # error: Argument 7 to "pivot_table" of "DataFrame" has incompatible type + # "**Dict[str, object]"; expected "Union[...]" table = df.pivot_table( "__dummy__", index=unique_rownames, @@ -702,7 +712,7 @@ def crosstab( margins=margins, margins_name=margins_name, dropna=dropna, - **kwargs, + **kwargs, # type: ignore[arg-type] ) # Post-process @@ -718,7 +728,7 @@ def crosstab( def _normalize( - table: DataFrame, normalize, margins: bool, margins_name="All" + table: DataFrame, normalize, margins: bool, margins_name: Hashable = "All" ) -> DataFrame: if not isinstance(normalize, (bool, str)): diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index f9852005314a4..641e115075c4b 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -9,6 +9,7 @@ Callable, Hashable, Iterator, + Literal, cast, ) import warnings @@ -426,7 +427,11 @@ def _get_series_list(self, others): @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"]) def cat( - self, others=None, sep=None, na_rep=None, join="left" + self, + others=None, + sep=None, + na_rep=None, + join: Literal["outer", "inner", "left", "right"] = "left", ) -> str | Series | Index: """ Concatenate strings in the Series/Index with given separator. @@ -978,7 +983,7 @@ def rsplit(self, pat=None, n=-1, expand=False): } ) @forbid_nonstring_types(["bytes"]) - def partition(self, sep=" ", expand=True): + def partition(self, sep: str = " ", expand=True): result = self._data.array._str_partition(sep, expand) return self._wrap_result(result, expand=expand, returns_string=expand) @@ -992,7 +997,7 @@ def partition(self, sep=" ", expand=True): } ) @forbid_nonstring_types(["bytes"]) - def rpartition(self, sep=" ", expand=True): + def rpartition(self, sep: str = " ", expand=True): result = self._data.array._str_rpartition(sep, expand) return self._wrap_result(result, expand=expand, returns_string=expand) @@ -1552,7 +1557,12 @@ def repeat(self, repeats): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def pad(self, width, side="left", fillchar=" "): + def pad( + self, + width, + side: Literal["left", "right", "both"] = "left", + fillchar: str = " ", + ): """ Pad strings in the Series/Index up to width. @@ -1641,17 +1651,17 @@ def pad(self, width, side="left", fillchar=" "): @Appender(_shared_docs["str_pad"] % {"side": "left and right", "method": "center"}) @forbid_nonstring_types(["bytes"]) - def center(self, width, fillchar=" "): + def center(self, width, fillchar: str = " "): return self.pad(width, side="both", fillchar=fillchar) @Appender(_shared_docs["str_pad"] % {"side": "right", "method": "ljust"}) @forbid_nonstring_types(["bytes"]) - def ljust(self, width, fillchar=" "): + def ljust(self, width, fillchar: str = " "): return self.pad(width, side="right", fillchar=fillchar) @Appender(_shared_docs["str_pad"] % {"side": "left", "method": "rjust"}) @forbid_nonstring_types(["bytes"]) - def rjust(self, width, fillchar=" "): + def rjust(self, width, fillchar: str = " "): return self.pad(width, side="left", fillchar=fillchar) @forbid_nonstring_types(["bytes"]) @@ -1873,7 +1883,7 @@ def slice_replace(self, start=None, stop=None, repl=None): result = self._data.array._str_slice_replace(start, stop, repl) return self._wrap_result(result) - def decode(self, encoding, errors="strict"): + def decode(self, encoding, errors: str = "strict"): """ Decode character string in the Series/Index using indicated encoding. @@ -1902,7 +1912,7 @@ def decode(self, encoding, errors="strict"): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def encode(self, encoding, errors="strict"): + def encode(self, encoding, errors: str = "strict"): """ Encode character string in the Series/Index using indicated encoding. @@ -2151,7 +2161,7 @@ def wrap(self, width, **kwargs): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def get_dummies(self, sep="|"): + def get_dummies(self, sep: str = "|"): """ Return DataFrame of dummy/indicator variables for Series. diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index ef0c3f8c2321d..6e9180e6927ee 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -3,7 +3,10 @@ import abc from collections.abc import Callable # noqa: PDF001 import re -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Literal, +) import numpy as np @@ -40,7 +43,12 @@ def _str_count(self, pat, flags=0): pass @abc.abstractmethod - def _str_pad(self, width, side="left", fillchar=" "): + def _str_pad( + self, + width, + side: Literal["left", "right", "both"] = "left", + fillchar: str = " ", + ): pass @abc.abstractmethod @@ -88,7 +96,7 @@ def _str_fullmatch( pass @abc.abstractmethod - def _str_encode(self, encoding, errors="strict"): + def _str_encode(self, encoding, errors: str = "strict"): pass @abc.abstractmethod @@ -148,7 +156,7 @@ def _str_wrap(self, width, **kwargs): pass @abc.abstractmethod - def _str_get_dummies(self, sep="|"): + def _str_get_dummies(self, sep: str = "|"): pass @abc.abstractmethod diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 407357d2c79e3..e15b0fd170230 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -3,7 +3,10 @@ from collections.abc import Callable # noqa: PDF001 import re import textwrap -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Literal, +) import unicodedata import numpy as np @@ -103,7 +106,12 @@ def _str_count(self, pat, flags=0): f = lambda x: len(regex.findall(x)) return self._str_map(f, dtype="int64") - def _str_pad(self, width, side="left", fillchar=" "): + def _str_pad( + self, + width, + side: Literal["left", "right", "both"] = "left", + fillchar: str = " ", + ): if side == "left": f = lambda x: x.rjust(width, fillchar) elif side == "right": @@ -218,7 +226,7 @@ def _str_fullmatch( f = lambda x: regex.fullmatch(x) is not None return self._str_map(f, na_value=na, dtype=np.dtype(bool)) - def _str_encode(self, encoding, errors="strict"): + def _str_encode(self, encoding, errors: str = "strict"): f = lambda x: x.encode(encoding, errors=errors) return self._str_map(f, dtype=object) @@ -354,7 +362,7 @@ def _str_wrap(self, width, **kwargs): tw = textwrap.TextWrapper(**kwargs) return self._str_map(lambda s: "\n".join(tw.wrap(s))) - def _str_get_dummies(self, sep="|"): + def _str_get_dummies(self, sep: str = "|"): from pandas import Series arr = Series(self).fillna("") diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 78e12c96ceee8..e55188c782256 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -329,7 +329,7 @@ def _convert_listlike_datetimes( name: Hashable = None, tz: Timezone | None = None, unit: str | None = None, - errors: str = "raise", + errors: DateTimeErrorChoices = "raise", infer_datetime_format: bool = False, dayfirst: bool | None = None, yearfirst: bool | None = None, @@ -713,7 +713,7 @@ def to_datetime( exact: bool = True, unit: str | None = None, infer_datetime_format: bool = False, - origin="unix", + origin: str = "unix", cache: bool = True, ) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: """ @@ -1306,7 +1306,9 @@ def calc_with_mask(carg, mask): return None -def to_time(arg, format=None, infer_time_format=False, errors="raise"): +def to_time( + arg, format=None, infer_time_format=False, errors: DateTimeErrorChoices = "raise" +): # GH#34145 warnings.warn( "`to_time` has been moved, should be imported from pandas.core.tools.times. " diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index ef7f4bc92e25b..9b158a6b5aed2 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -1,9 +1,14 @@ from __future__ import annotations +from typing import Literal + import numpy as np from pandas._libs import lib -from pandas._typing import npt +from pandas._typing import ( + DateTimeErrorChoices, + npt, +) from pandas.core.dtypes.cast import maybe_downcast_numeric from pandas.core.dtypes.common import ( @@ -25,7 +30,11 @@ from pandas.core.arrays.numeric import NumericArray -def to_numeric(arg, errors="raise", downcast=None): +def to_numeric( + arg, + errors: DateTimeErrorChoices = "raise", + downcast: Literal["integer", "signed", "unsigned", "float"] | None = None, +): """ Convert argument to a numeric type. diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 5026c97c0b2b0..705c77090e168 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -211,7 +211,9 @@ def to_timedelta( return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) -def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): +def _coerce_scalar_to_timedelta_type( + r, unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise" +): """Convert string 'r' to a timedelta object.""" result: Timedelta | NaTType @@ -229,7 +231,9 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): return result -def _convert_listlike(arg, unit=None, errors="raise", name=None): +def _convert_listlike( + arg, unit=None, errors: DateTimeErrorChoices = "raise", name=None +): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): # This is needed only to ensure that in the case where we end up diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py index 87667921bf75a..01254ddf4d13e 100644 --- a/pandas/core/tools/times.py +++ b/pandas/core/tools/times.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs.lib import is_list_like +from pandas._typing import DateTimeErrorChoices from pandas.core.dtypes.generic import ( ABCIndex, @@ -16,7 +17,9 @@ from pandas.core.dtypes.missing import notna -def to_time(arg, format=None, infer_time_format=False, errors="raise"): +def to_time( + arg, format=None, infer_time_format=False, errors: DateTimeErrorChoices = "raise" +): """ Parse time strings to time objects using fixed strptime formats ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 32559d0d88bcf..6fae119bffdf1 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -447,7 +447,7 @@ def _get_window_indexer(self) -> BaseIndexer: return ExponentialMovingWindowIndexer() def online( - self, engine="numba", engine_kwargs=None + self, engine: str = "numba", engine_kwargs=None ) -> OnlineExponentialMovingWindow: """ Return an ``OnlineExponentialMovingWindow`` object to calculate diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 912c088d8b520..fb554fa4b4a5e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1857,7 +1857,7 @@ def _format_strings(self) -> list[str]: def get_format_timedelta64( values: np.ndarray | TimedeltaIndex | TimedeltaArray, - nat_rep: str = "NaT", + nat_rep: object = "NaT", box: bool = False, ) -> Callable: """ @@ -1888,7 +1888,7 @@ def get_format_timedelta64( def _formatter(x): if x is None or (is_scalar(x) and isna(x)): - return nat_rep + return str(nat_rep) if not isinstance(x, Timedelta): x = Timedelta(x) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4cda523987020..19f377580e599 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -9,6 +9,7 @@ import inspect import operator from typing import ( + TYPE_CHECKING, Any, Callable, Generator, @@ -70,6 +71,9 @@ refactor_levels, ) +if TYPE_CHECKING: + from matplotlib.colors import Colormap + try: import matplotlib as mpl import matplotlib.pyplot as plt @@ -2995,7 +2999,7 @@ def _get_numeric_subset_default(self): @Substitution(subset=subset) def background_gradient( self, - cmap="PuBu", + cmap: str | Colormap = "PuBu", low: float = 0, high: float = 0, axis: Axis | None = 0, @@ -3150,7 +3154,7 @@ def background_gradient( ) def text_gradient( self, - cmap="PuBu", + cmap: str | Colormap = "PuBu", low: float = 0, high: float = 0, axis: Axis | None = 0, @@ -3906,7 +3910,7 @@ def _validate_apply_axis_arg( def _background_gradient( data, - cmap="PuBu", + cmap: str | Colormap = "PuBu", low: float = 0, high: float = 0, text_color_threshold: float = 0.408, diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 07a09677caf13..75e872d986556 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -621,12 +621,12 @@ def _translate_body(self, idx_lengths: dict, max_rows: int, max_cols: int): def _check_trim( self, - count, - max, - obj, - element, - css=None, - value="...", + count: int, + max: int, + obj: list, + element: str, + css: str | None = None, + value: str = "...", ) -> bool: """ Indicates whether to break render loops and append a trimming indicator @@ -1503,7 +1503,7 @@ def alias_(x, value): def _element( html_element: str, - html_class: str, + html_class: str | None, value: Any, is_visible: bool, **kwargs, diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index d0ec419c3b392..f1f58bdaadfa3 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -3,7 +3,10 @@ import io import os -from typing import Any +from typing import ( + Any, + Literal, +) from warnings import catch_warnings from pandas._typing import ( @@ -270,7 +273,7 @@ def write( self, df: DataFrame, path, - compression="snappy", + compression: Literal["snappy", "gzip", "brotli"] | None = "snappy", index=None, partition_cols=None, storage_options: StorageOptions = None, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9e940dc6d4110..3a541de48972e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1530,7 +1530,7 @@ def get_storer(self, key: str) -> GenericFixed | Table: def copy( self, file, - mode="w", + mode: str = "w", propindexes: bool = True, keys=None, complib=None, diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b4432abd1061a..887df1bfb125c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,6 +18,7 @@ TYPE_CHECKING, Any, Iterator, + Literal, cast, overload, ) @@ -603,7 +604,7 @@ def to_sql( name: str, con, schema: str | None = None, - if_exists: str = "fail", + if_exists: Literal["fail", "replace", "append"] = "fail", index: bool = True, index_label: IndexLabel = None, chunksize: int | None = None, @@ -784,7 +785,7 @@ def __init__( pandas_sql_engine, frame=None, index: bool | str | list[str] | None = True, - if_exists: str = "fail", + if_exists: Literal["fail", "replace", "append"] = "fail", prefix: str = "pandas", index_label=None, schema=None, @@ -1269,7 +1270,7 @@ def to_sql( self, frame, name, - if_exists: str = "fail", + if_exists: Literal["fail", "replace", "append"] = "fail", index: bool = True, index_label=None, schema=None, @@ -1589,7 +1590,7 @@ def prep_table( self, frame, name, - if_exists="fail", + if_exists: Literal["fail", "replace", "append"] = "fail", index=True, index_label=None, schema=None, @@ -1666,14 +1667,14 @@ def to_sql( self, frame, name, - if_exists: str = "fail", + if_exists: Literal["fail", "replace", "append"] = "fail", index: bool = True, index_label=None, schema=None, chunksize=None, dtype: DtypeArg | None = None, method=None, - engine="auto", + engine: str = "auto", **engine_kwargs, ) -> int | None: """ diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index cd34f2264f44f..e15368189df76 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -3,6 +3,7 @@ import inspect from typing import ( TYPE_CHECKING, + Collection, Literal, NamedTuple, ) @@ -11,6 +12,7 @@ from matplotlib.artist import setp import numpy as np +from pandas._typing import MatplotlibColor from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_dict_like @@ -68,8 +70,11 @@ def _args_adjust(self) -> None: else: self.sharey = False + # error: Signature of "_plot" incompatible with supertype "MPLPlot" @classmethod - def _plot(cls, ax, y, column_num=None, return_type="axes", **kwds): + def _plot( # type: ignore[override] + cls, ax, y, column_num=None, return_type: str = "axes", **kwds + ): if y.ndim == 2: y = [remove_na_arraylike(v) for v in y] # Boxplot fails with empty arrays, so need to add a NaN @@ -118,7 +123,14 @@ def _validate_color_args(self): self._medians_c = colors[2] self._caps_c = colors[0] - def _get_colors(self, num_colors=None, color_kwds="color") -> None: + def _get_colors( + self, + num_colors=None, + color_kwds: dict[str, MatplotlibColor] + | MatplotlibColor + | Collection[MatplotlibColor] + | None = "color", + ) -> None: pass def maybe_color_bp(self, bp) -> None: diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 2b90c6dd66540..aba0662ba361f 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -985,7 +985,11 @@ def _apply_style_colors(self, colors, kwds, col_num, label): kwds["color"] = colors[col_num % len(colors)] return style, kwds - def _get_colors(self, num_colors=None, color_kwds="color"): + def _get_colors( + self, + num_colors: int | None = None, + color_kwds: str = "color", + ): if num_colors is None: num_colors = self.nseries diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 2f29aafbdf5cf..4d8b63056c7be 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -6,8 +6,6 @@ TYPE_CHECKING, Collection, Iterator, - Sequence, - Union, cast, ) import warnings @@ -16,6 +14,7 @@ import matplotlib.colors import numpy as np +from pandas._typing import MatplotlibColor as Color from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_list_like @@ -26,9 +25,6 @@ from matplotlib.colors import Colormap -Color = Union[str, Sequence[float]] - - def get_standard_colors( num_colors: int, colormap: Colormap | None = None, diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json index 4a82c579262e3..9bb53c31d2e55 100644 --- a/pyright_reportGeneralTypeIssues.json +++ b/pyright_reportGeneralTypeIssues.json @@ -17,6 +17,7 @@ # and all files that currently don't pass "pandas/_testing/__init__.py", "pandas/_testing/_hypothesis.py", + "pandas/_testing/_io.py", "pandas/compat/pickle_compat.py", "pandas/core/algorithms.py", "pandas/core/apply.py", @@ -79,6 +80,7 @@ "pandas/core/sorting.py", "pandas/core/strings/accessor.py", "pandas/core/tools/datetimes.py", + "pandas/core/tools/numeric.py", "pandas/core/tools/timedeltas.py", "pandas/core/util/hashing.py", "pandas/core/window/ewm.py", From f826ccfb39e191ffd9a6ef31b7bc67dc44b0b665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 12 Sep 2022 16:09:15 -0400 Subject: [PATCH 2/8] na_rep: back to str --- pandas/_typing.py | 1 + pandas/core/arrays/categorical.py | 3 ++- pandas/core/arrays/datetimelike.py | 3 ++- pandas/core/arrays/datetimes.py | 3 ++- pandas/core/arrays/period.py | 3 ++- pandas/core/arrays/timedeltas.py | 3 ++- pandas/core/frame.py | 15 ++++++++------- pandas/core/generic.py | 15 ++++++++------- pandas/core/indexes/base.py | 7 ++++--- pandas/core/indexes/category.py | 7 +++---- pandas/core/indexes/datetimelike.py | 5 +++-- pandas/core/indexes/interval.py | 5 +++-- pandas/core/indexes/multi.py | 9 ++++----- pandas/core/indexes/numeric.py | 3 ++- pandas/core/indexes/range.py | 3 ++- pandas/core/internals/blocks.py | 5 +++-- pandas/core/series.py | 7 ++++--- pandas/io/formats/excel.py | 3 ++- pandas/io/formats/format.py | 27 ++++++++++++++------------- pandas/io/formats/style.py | 7 ++++--- pandas/io/formats/style_render.py | 7 ++++--- pandas/io/formats/xml.py | 3 ++- 22 files changed, 81 insertions(+), 63 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index b35060e57ed07..360f30904d474 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -340,3 +340,4 @@ def closed(self) -> bool: TimeNonexistent = Union[ Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta ] +NaRep = str diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 31e7e97422b65..698e51cec0a24 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,6 +40,7 @@ ArrayLike, AstypeArg, Dtype, + NaRep, NpDtype, Ordered, Shape, @@ -2199,7 +2200,7 @@ def _repr_footer(self) -> str: return f"Length: {len(self)}\n{info}" def _get_repr( - self, length: bool = True, na_rep: str = "NaN", footer: bool = True + self, length: bool = True, na_rep: NaRep = "NaN", footer: bool = True ) -> str: from pandas.io.formats import format as fmt diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bd0a118b0a81e..be099a5e484e7 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -55,6 +55,7 @@ DatetimeLikeScalar, Dtype, DtypeObj, + NaRep, NpDtype, PositionalIndexer2D, PositionalIndexerTuple, @@ -313,7 +314,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: object = "NaT", date_format=None + self, *, na_rep: NaRep | float = "NaT", date_format=None ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 691b5ed5993b0..ebcccef012d43 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,6 +44,7 @@ from pandas._typing import ( DateTimeErrorChoices, IntervalClosedType, + NaRep, TimeAmbiguous, TimeNonexistent, npt, @@ -648,7 +649,7 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: object = "NaT", date_format=None, **kwargs + self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_datetime64_from_values diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 92a7f972941e4..7ba7266b71b5e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -48,6 +48,7 @@ from pandas._typing import ( AnyArrayLike, Dtype, + NaRep, NpDtype, npt, ) @@ -642,7 +643,7 @@ def _formatter(self, boxed: bool = False): @dtl.ravel_compat def _format_native_types( - self, *, na_rep: object = "NaT", date_format=None, **kwargs + self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: """ actually format my specific types diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9fc2ccdd6cb65..2f54c50a5f048 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -34,6 +34,7 @@ from pandas._typing import ( DateTimeErrorChoices, DtypeObj, + NaRep, NpDtype, npt, ) @@ -373,7 +374,7 @@ def _formatter(self, boxed: bool = False): return get_format_timedelta64(self, box=True) def _format_native_types( - self, *, na_rep: object = "NaT", date_format=None, **kwargs + self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_timedelta64 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b0d1d9c9f6c9c..561d2a4a3ddb1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -69,6 +69,7 @@ IndexLabel, Level, NaPosition, + NaRep, PythonFuncType, QuantileInterpolation, ReadBuffer, @@ -1115,7 +1116,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: fmt.FormattersType | None = ..., float_format: fmt.FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -1140,7 +1141,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: fmt.FormattersType | None = ..., float_format: fmt.FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -1175,7 +1176,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", formatters: fmt.FormattersType | None = None, float_format: fmt.FloatFormatType | None = None, sparsify: bool | None = None, @@ -3079,7 +3080,7 @@ def to_html( col_space: ColspaceArgType | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -3108,7 +3109,7 @@ def to_html( col_space: ColspaceArgType | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -3146,7 +3147,7 @@ def to_html( col_space: ColspaceArgType | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", formatters: FormattersType | None = None, float_format: FloatFormatType | None = None, sparsify: bool | None = None, @@ -3235,7 +3236,7 @@ def to_xml( index: bool = True, root_name: str | None = "data", row_name: str | None = "row", - na_rep: str | None = None, + na_rep: NaRep | None = None, attr_cols: list[str] | None = None, elem_cols: list[str] | None = None, namespaces: dict[str | None, str] | None = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5ab827469e42..1b901b5b3bfc3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -62,6 +62,7 @@ Level, Manager, NaPosition, + NaRep, NDFrameT, RandomState, Renamer, @@ -2213,7 +2214,7 @@ def to_excel( self, excel_writer, sheet_name: str = "Sheet1", - na_rep: str = "", + na_rep: NaRep = "", float_format: str | None = None, columns: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool_t = True, @@ -3236,7 +3237,7 @@ def to_latex( col_space: ColspaceArgType | None = ..., header: bool_t | Sequence[str] = ..., index: bool_t = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool_t | None = ..., @@ -3264,7 +3265,7 @@ def to_latex( col_space: ColspaceArgType | None = ..., header: bool_t | Sequence[str] = ..., index: bool_t = ..., - na_rep: str = ..., + na_rep: NaRep = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool_t | None = ..., @@ -3293,7 +3294,7 @@ def to_latex( col_space: ColspaceArgType | None = None, header: bool_t | Sequence[str] = True, index: bool_t = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", formatters: FormattersType | None = None, float_format: FloatFormatType | None = None, sparsify: bool_t | None = None, @@ -3488,7 +3489,7 @@ def to_csv( self, path_or_buf: None = ..., sep: str = ..., - na_rep: str = ..., + na_rep: NaRep = ..., float_format: str | Callable | None = ..., columns: Sequence[Hashable] | None = ..., header: bool_t | list[str] = ..., @@ -3515,7 +3516,7 @@ def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], sep: str = ..., - na_rep: str = ..., + na_rep: NaRep = ..., float_format: str | Callable | None = ..., columns: Sequence[Hashable] | None = ..., header: bool_t | list[str] = ..., @@ -3547,7 +3548,7 @@ def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, sep: str = ",", - na_rep: str = "", + na_rep: NaRep = "", float_format: str | Callable | None = None, columns: Sequence[Hashable] | None = None, header: bool_t | list[str] = True, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce2a52e6e52b7..682a4e5cdd85f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -53,6 +53,7 @@ F, IgnoreRaise, Level, + NaRep, Shape, npt, ) @@ -1441,7 +1442,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: object = "NaN", + na_rep: NaRep = "NaN", ) -> list[str_t]: """ Render a string representation of the Index. @@ -1459,7 +1460,7 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header: list[str_t], na_rep: object) -> list[str_t]: + def _format_with_header(self, header: list[str_t], na_rep: NaRep) -> list[str_t]: from pandas.io.formats.format import format_array values = self._values @@ -1520,7 +1521,7 @@ def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: return values._format_native_types(**kwargs) def _format_native_types( - self, *, na_rep: object = "", quoting=None, **kwargs + self, *, na_rep: NaRep = "", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: """ Actually format specific types of the index. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ef3e545505bf0..58a74ccb5dfcf 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -13,6 +13,7 @@ from pandas._typing import ( Dtype, DtypeObj, + NaRep, npt, ) from pandas.util._decorators import ( @@ -358,11 +359,9 @@ def _format_attrs(self): extra = super()._format_attrs() return attrs + extra - def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: result = [ - pprint_thing(x, escape_chars=("\t", "\r", "\n")) - if notna(x) - else str(na_rep) + pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep for x in self._values ] return header + result diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 70ba2b13154f4..2c706bea14b1d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -30,6 +30,7 @@ parsing, to_offset, ) +from pandas._typing import NaRep from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -155,7 +156,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: object = "NaT", + na_rep: NaRep = "NaT", date_format: str | None = None, ) -> list[str]: """ @@ -175,7 +176,7 @@ def format( return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( - self, header: list[str], na_rep: object = "NaT", date_format: str | None = None + self, header: list[str], na_rep: NaRep = "NaT", date_format: str | None = None ) -> list[str]: # matches base class except for whitespace padding and date_format return header + list( diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6b7133dfeaf8e..3244047713b5a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -30,6 +30,7 @@ Dtype, DtypeObj, IntervalClosedType, + NaRep, npt, ) from pandas.errors import InvalidIndexError @@ -816,12 +817,12 @@ def length(self) -> Index: # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: # matches base class except for whitespace padding return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types( - self, *, na_rep: object = "NaN", quoting=None, **kwargs + self, *, na_rep: NaRep = "NaN", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: # GH 28210: use base method but with different default na_rep return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e43f225de12e6..67cb7b918344f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -34,6 +34,7 @@ DtypeObj, F, IgnoreRaise, + NaRep, Scalar, Shape, npt, @@ -1321,7 +1322,7 @@ def _formatter_func(self, tup): return tuple(func(val) for func, val in zip(formatter_funcs, tup)) def _format_native_types( - self, *, na_rep: object = "nan", **kwargs + self, *, na_rep: NaRep = "nan", **kwargs ) -> npt.NDArray[np.object_]: new_levels = [] new_codes = [] @@ -1335,9 +1336,7 @@ def _format_native_types( nan_index = len(level_strs) # numpy 1.21 deprecated implicit string casting level_strs = level_strs.astype(str) - # error: Argument 2 to "append" has incompatible type "object"; - # expected "Union[...]" - level_strs = np.append(level_strs, na_rep) # type: ignore[arg-type] + level_strs = np.append(level_strs, na_rep) assert not level_codes.flags.writeable # i.e. copy is needed level_codes = level_codes.copy() # make writeable level_codes[mask] = nan_index @@ -1362,7 +1361,7 @@ def format( self, name: bool | None = None, formatter: Callable | None = None, - na_rep: object = None, + na_rep: NaRep | None = None, names: bool = False, space: int = 2, sparsify=None, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index c37fb4929015b..a459b1dc610a2 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,6 +15,7 @@ ) from pandas._typing import ( Dtype, + NaRep, npt, ) from pandas.util._decorators import ( @@ -283,7 +284,7 @@ def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: def _format_native_types( self, *, - na_rep: object = "", + na_rep: NaRep = "", float_format=None, decimal: str = ".", quoting=None, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0e7917ccca0b7..d7c953c370925 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -25,6 +25,7 @@ from pandas._libs.lib import no_default from pandas._typing import ( Dtype, + NaRep, npt, ) from pandas.compat.numpy import function as nv @@ -228,7 +229,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header: list[str], na_rep: object) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: # Equivalent to Index implementation, but faster if not len(self._range): return header diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e54ed1e60a595..e329599920fed 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -30,6 +30,7 @@ F, FillnaOptions, IgnoreRaise, + NaRep, QuantileInterpolation, Shape, npt, @@ -539,7 +540,7 @@ def astype( return newb @final - def to_native_types(self, na_rep: object = "nan", quoting=None, **kwargs) -> Block: + def to_native_types(self, na_rep: NaRep = "nan", quoting=None, **kwargs) -> Block: """convert to our native types format""" result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) return self.make_block(result) @@ -2286,7 +2287,7 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: def to_native_types( values: ArrayLike, *, - na_rep: object = "nan", + na_rep: NaRep = "nan", quoting=None, float_format=None, decimal: str = ".", diff --git a/pandas/core/series.py b/pandas/core/series.py index fc97a8f04e0cc..f6388f1636662 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,6 +49,7 @@ IndexLabel, Level, NaPosition, + NaRep, QuantileInterpolation, Renamer, SingleManager, @@ -1601,7 +1602,7 @@ def __repr__(self) -> str: def to_string( self, buf: None = ..., - na_rep: str = ..., + na_rep: NaRep = ..., float_format: str | None = ..., header: bool = ..., index: bool = ..., @@ -1617,7 +1618,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str], - na_rep: str = ..., + na_rep: NaRep = ..., float_format: str | None = ..., header: bool = ..., index: bool = ..., @@ -1632,7 +1633,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", float_format: str | None = None, header: bool = True, index: bool = True, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index c4ddac088d901..021061f2b69ef 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -26,6 +26,7 @@ from pandas._libs.lib import is_list_like from pandas._typing import ( IndexLabel, + NaRep, StorageOptions, ) from pandas.util._decorators import doc @@ -499,7 +500,7 @@ class ExcelFormatter: def __init__( self, df, - na_rep: str = "", + na_rep: NaRep = "", float_format: str | None = None, cols: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool = True, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fb554fa4b4a5e..87d6cb406fa94 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -59,6 +59,7 @@ FloatFormatType, FormattersType, IndexLabel, + NaRep, StorageOptions, WriteBuffer, ) @@ -207,7 +208,7 @@ def __init__( categorical: Categorical, buf: IO[str] | None = None, length: bool = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", footer: bool = True, ) -> None: self.categorical = categorical @@ -273,7 +274,7 @@ def __init__( length: bool | str = True, header: bool = True, index: bool = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", name: bool = False, float_format: str | None = None, dtype: bool = True, @@ -570,7 +571,7 @@ def __init__( col_space: ColspaceArgType | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", formatters: FormattersType | None = None, justify: str | None = None, float_format: FloatFormatType | None = None, @@ -1253,7 +1254,7 @@ def format_array( values: Any, formatter: Callable | None, float_format: FloatFormatType | None = None, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", digits: int | None = None, space: str | int | None = None, justify: str = "right", @@ -1334,7 +1335,7 @@ def __init__( values: Any, digits: int = 7, formatter: Callable | None = None, - na_rep: str = "NaN", + na_rep: NaRep = "NaN", space: str | int = 12, float_format: FloatFormatType | None = None, justify: str = "right", @@ -1509,7 +1510,7 @@ def get_result_as_array(self) -> np.ndarray: the parameters given at initialisation, as a numpy array """ - def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): + def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: NaRep): mask = isna(values) formatted = np.array( [ @@ -1621,7 +1622,7 @@ class Datetime64Formatter(GenericArrayFormatter): def __init__( self, values: np.ndarray | Series | DatetimeIndex | DatetimeArray, - nat_rep: str = "NaT", + nat_rep: NaRep = "NaT", date_format: None = None, **kwargs, ) -> None: @@ -1769,7 +1770,7 @@ def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> return False -def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: +def _format_datetime64(x: NaTType | Timestamp, nat_rep: NaRep = "NaT") -> str: if x is NaT: return nat_rep @@ -1780,7 +1781,7 @@ def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: def _format_datetime64_dateonly( x: NaTType | Timestamp, - nat_rep: str = "NaT", + nat_rep: NaRep = "NaT", date_format: str | None = None, ) -> str: if isinstance(x, NaTType): @@ -1794,7 +1795,7 @@ def _format_datetime64_dateonly( def get_format_datetime64( - is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None + is_dates_only: bool, nat_rep: NaRep = "NaT", date_format: str | None = None ) -> Callable: """Return a formatter callable taking a datetime64 as input and providing a string as output""" @@ -1840,7 +1841,7 @@ class Timedelta64Formatter(GenericArrayFormatter): def __init__( self, values: np.ndarray | TimedeltaIndex, - nat_rep: str = "NaT", + nat_rep: NaRep = "NaT", box: bool = False, **kwargs, ) -> None: @@ -1857,7 +1858,7 @@ def _format_strings(self) -> list[str]: def get_format_timedelta64( values: np.ndarray | TimedeltaIndex | TimedeltaArray, - nat_rep: object = "NaT", + nat_rep: NaRep | float = "NaT", box: bool = False, ) -> Callable: """ @@ -1888,7 +1889,7 @@ def get_format_timedelta64( def _formatter(x): if x is None or (is_scalar(x) and isna(x)): - return str(nat_rep) + return nat_rep if not isinstance(x, Timedelta): x = Timedelta(x) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 19f377580e599..1c3fbcb09f087 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -29,6 +29,7 @@ FilePath, IndexLabel, Level, + NaRep, QuantileInterpolation, Scalar, StorageOptions, @@ -244,7 +245,7 @@ def __init__( caption: str | tuple | None = None, table_attributes: str | None = None, cell_ids: bool = True, - na_rep: str | None = None, + na_rep: NaRep | None = None, uuid_len: int = 5, decimal: str | None = None, thousands: str | None = None, @@ -562,7 +563,7 @@ def to_excel( self, excel_writer, sheet_name: str = "Sheet1", - na_rep: str = "", + na_rep: NaRep = "", float_format: str | None = None, columns: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool = True, @@ -2649,7 +2650,7 @@ def set_table_styles( self.table_styles = table_styles return self - def set_na_rep(self, na_rep: str) -> StylerRenderer: + def set_na_rep(self, na_rep: NaRep) -> StylerRenderer: """ Set the missing data representation on a ``Styler``. diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 75e872d986556..1c09e0151e7de 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -25,6 +25,7 @@ from pandas._typing import ( Axis, Level, + NaRep, ) from pandas.compat._optional import import_optional_dependency @@ -928,7 +929,7 @@ def format( self, formatter: ExtFormatter | None = None, subset: Subset | None = None, - na_rep: str | None = None, + na_rep: NaRep | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, @@ -1159,7 +1160,7 @@ def format_index( formatter: ExtFormatter | None = None, axis: int | str = 0, level: Level | list[Level] | None = None, - na_rep: str | None = None, + na_rep: NaRep | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, @@ -1751,7 +1752,7 @@ def _render_href(x, format): def _maybe_wrap_formatter( formatter: BaseFormatter | None = None, - na_rep: str | None = None, + na_rep: NaRep | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index eb1835f0392b0..8939438fd48b4 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -13,6 +13,7 @@ from pandas._typing import ( CompressionOptions, FilePath, + NaRep, ReadBuffer, StorageOptions, WriteBuffer, @@ -106,7 +107,7 @@ def __init__( index: bool = True, root_name: str | None = "data", row_name: str | None = "row", - na_rep: str | None = None, + na_rep: NaRep | None = None, attr_cols: list[str] | None = None, elem_cols: list[str] | None = None, namespaces: dict[str | None, str] | None = None, From 3ea02492c7e2c669626b84c8f78728e4f1985e30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 13 Sep 2022 15:42:52 -0400 Subject: [PATCH 3/8] na(t)_rep is always a string --- pandas/_typing.py | 1 - pandas/core/arrays/categorical.py | 3 +-- pandas/core/arrays/datetimelike.py | 5 ++--- pandas/core/arrays/datetimes.py | 3 +-- pandas/core/arrays/period.py | 3 +-- pandas/core/arrays/timedeltas.py | 3 +-- pandas/core/frame.py | 15 +++++++-------- pandas/core/generic.py | 15 +++++++-------- pandas/core/indexes/base.py | 7 +++---- pandas/core/indexes/category.py | 3 +-- pandas/core/indexes/datetimelike.py | 5 ++--- pandas/core/indexes/interval.py | 5 ++--- pandas/core/indexes/multi.py | 5 ++--- pandas/core/indexes/numeric.py | 3 +-- pandas/core/indexes/range.py | 3 +-- pandas/core/internals/blocks.py | 5 ++--- pandas/core/series.py | 7 +++---- pandas/io/formats/excel.py | 3 +-- pandas/io/formats/format.py | 25 ++++++++++++------------- pandas/io/formats/style.py | 7 +++---- pandas/io/formats/style_render.py | 7 +++---- pandas/io/formats/xml.py | 3 +-- 22 files changed, 57 insertions(+), 79 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 360f30904d474..b35060e57ed07 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -340,4 +340,3 @@ def closed(self) -> bool: TimeNonexistent = Union[ Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta ] -NaRep = str diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 698e51cec0a24..31e7e97422b65 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,7 +40,6 @@ ArrayLike, AstypeArg, Dtype, - NaRep, NpDtype, Ordered, Shape, @@ -2200,7 +2199,7 @@ def _repr_footer(self) -> str: return f"Length: {len(self)}\n{info}" def _get_repr( - self, length: bool = True, na_rep: NaRep = "NaN", footer: bool = True + self, length: bool = True, na_rep: str = "NaN", footer: bool = True ) -> str: from pandas.io.formats import format as fmt diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index be099a5e484e7..edcc0cd97bd08 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -55,7 +55,6 @@ DatetimeLikeScalar, Dtype, DtypeObj, - NaRep, NpDtype, PositionalIndexer2D, PositionalIndexerTuple, @@ -314,7 +313,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: NaRep | float = "NaT", date_format=None + self, *, na_rep: str = "NaT", date_format=None ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. @@ -1791,7 +1790,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: 'March 10, 2018, 09:00:02 AM'], dtype='object') """ - result = self._format_native_types(date_format=date_format, na_rep=np.nan) + result = self._format_native_types(date_format=date_format, na_rep=str(np.nan)) return result.astype(object, copy=False) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ebcccef012d43..a51b3b3e20683 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,7 +44,6 @@ from pandas._typing import ( DateTimeErrorChoices, IntervalClosedType, - NaRep, TimeAmbiguous, TimeNonexistent, npt, @@ -649,7 +648,7 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs + self, *, na_rep: str = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_datetime64_from_values diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 7ba7266b71b5e..f92e48bbdc655 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -48,7 +48,6 @@ from pandas._typing import ( AnyArrayLike, Dtype, - NaRep, NpDtype, npt, ) @@ -643,7 +642,7 @@ def _formatter(self, boxed: bool = False): @dtl.ravel_compat def _format_native_types( - self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs + self, *, na_rep: str = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: """ actually format my specific types diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 2f54c50a5f048..a26c8fde9fcca 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -34,7 +34,6 @@ from pandas._typing import ( DateTimeErrorChoices, DtypeObj, - NaRep, NpDtype, npt, ) @@ -374,7 +373,7 @@ def _formatter(self, boxed: bool = False): return get_format_timedelta64(self, box=True) def _format_native_types( - self, *, na_rep: NaRep | float = "NaT", date_format=None, **kwargs + self, *, na_rep: str = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_timedelta64 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 561d2a4a3ddb1..b0d1d9c9f6c9c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -69,7 +69,6 @@ IndexLabel, Level, NaPosition, - NaRep, PythonFuncType, QuantileInterpolation, ReadBuffer, @@ -1116,7 +1115,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: fmt.FormattersType | None = ..., float_format: fmt.FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -1141,7 +1140,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: fmt.FormattersType | None = ..., float_format: fmt.FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -1176,7 +1175,7 @@ def to_string( col_space: int | list[int] | dict[Hashable, int] | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", formatters: fmt.FormattersType | None = None, float_format: fmt.FloatFormatType | None = None, sparsify: bool | None = None, @@ -3080,7 +3079,7 @@ def to_html( col_space: ColspaceArgType | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -3109,7 +3108,7 @@ def to_html( col_space: ColspaceArgType | None = ..., header: bool | Sequence[str] = ..., index: bool = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool | None = ..., @@ -3147,7 +3146,7 @@ def to_html( col_space: ColspaceArgType | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", formatters: FormattersType | None = None, float_format: FloatFormatType | None = None, sparsify: bool | None = None, @@ -3236,7 +3235,7 @@ def to_xml( index: bool = True, root_name: str | None = "data", row_name: str | None = "row", - na_rep: NaRep | None = None, + na_rep: str | None = None, attr_cols: list[str] | None = None, elem_cols: list[str] | None = None, namespaces: dict[str | None, str] | None = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1b901b5b3bfc3..c5ab827469e42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -62,7 +62,6 @@ Level, Manager, NaPosition, - NaRep, NDFrameT, RandomState, Renamer, @@ -2214,7 +2213,7 @@ def to_excel( self, excel_writer, sheet_name: str = "Sheet1", - na_rep: NaRep = "", + na_rep: str = "", float_format: str | None = None, columns: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool_t = True, @@ -3237,7 +3236,7 @@ def to_latex( col_space: ColspaceArgType | None = ..., header: bool_t | Sequence[str] = ..., index: bool_t = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool_t | None = ..., @@ -3265,7 +3264,7 @@ def to_latex( col_space: ColspaceArgType | None = ..., header: bool_t | Sequence[str] = ..., index: bool_t = ..., - na_rep: NaRep = ..., + na_rep: str = ..., formatters: FormattersType | None = ..., float_format: FloatFormatType | None = ..., sparsify: bool_t | None = ..., @@ -3294,7 +3293,7 @@ def to_latex( col_space: ColspaceArgType | None = None, header: bool_t | Sequence[str] = True, index: bool_t = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", formatters: FormattersType | None = None, float_format: FloatFormatType | None = None, sparsify: bool_t | None = None, @@ -3489,7 +3488,7 @@ def to_csv( self, path_or_buf: None = ..., sep: str = ..., - na_rep: NaRep = ..., + na_rep: str = ..., float_format: str | Callable | None = ..., columns: Sequence[Hashable] | None = ..., header: bool_t | list[str] = ..., @@ -3516,7 +3515,7 @@ def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], sep: str = ..., - na_rep: NaRep = ..., + na_rep: str = ..., float_format: str | Callable | None = ..., columns: Sequence[Hashable] | None = ..., header: bool_t | list[str] = ..., @@ -3548,7 +3547,7 @@ def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, sep: str = ",", - na_rep: NaRep = "", + na_rep: str = "", float_format: str | Callable | None = None, columns: Sequence[Hashable] | None = None, header: bool_t | list[str] = True, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 682a4e5cdd85f..578c14aea5d80 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -53,7 +53,6 @@ F, IgnoreRaise, Level, - NaRep, Shape, npt, ) @@ -1442,7 +1441,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: NaRep = "NaN", + na_rep: str_t = "NaN", ) -> list[str_t]: """ Render a string representation of the Index. @@ -1460,7 +1459,7 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header: list[str_t], na_rep: NaRep) -> list[str_t]: + def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: from pandas.io.formats.format import format_array values = self._values @@ -1521,7 +1520,7 @@ def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: return values._format_native_types(**kwargs) def _format_native_types( - self, *, na_rep: NaRep = "", quoting=None, **kwargs + self, *, na_rep: str_t = "", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: """ Actually format specific types of the index. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 58a74ccb5dfcf..d1bdedee5caa0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -13,7 +13,6 @@ from pandas._typing import ( Dtype, DtypeObj, - NaRep, npt, ) from pandas.util._decorators import ( @@ -359,7 +358,7 @@ def _format_attrs(self): extra = super()._format_attrs() return attrs + extra - def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: result = [ pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep for x in self._values diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2c706bea14b1d..6867ef936d45e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -30,7 +30,6 @@ parsing, to_offset, ) -from pandas._typing import NaRep from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -156,7 +155,7 @@ def format( self, name: bool = False, formatter: Callable | None = None, - na_rep: NaRep = "NaT", + na_rep: str = "NaT", date_format: str | None = None, ) -> list[str]: """ @@ -176,7 +175,7 @@ def format( return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( - self, header: list[str], na_rep: NaRep = "NaT", date_format: str | None = None + self, header: list[str], na_rep: str = "NaT", date_format: str | None = None ) -> list[str]: # matches base class except for whitespace padding and date_format return header + list( diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3244047713b5a..c26c392b2ce49 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -30,7 +30,6 @@ Dtype, DtypeObj, IntervalClosedType, - NaRep, npt, ) from pandas.errors import InvalidIndexError @@ -817,12 +816,12 @@ def length(self) -> Index: # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: # matches base class except for whitespace padding return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types( - self, *, na_rep: NaRep = "NaN", quoting=None, **kwargs + self, *, na_rep: str = "NaN", quoting=None, **kwargs ) -> npt.NDArray[np.object_]: # GH 28210: use base method but with different default na_rep return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 67cb7b918344f..28a376ff16622 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -34,7 +34,6 @@ DtypeObj, F, IgnoreRaise, - NaRep, Scalar, Shape, npt, @@ -1322,7 +1321,7 @@ def _formatter_func(self, tup): return tuple(func(val) for func, val in zip(formatter_funcs, tup)) def _format_native_types( - self, *, na_rep: NaRep = "nan", **kwargs + self, *, na_rep: str = "nan", **kwargs ) -> npt.NDArray[np.object_]: new_levels = [] new_codes = [] @@ -1361,7 +1360,7 @@ def format( self, name: bool | None = None, formatter: Callable | None = None, - na_rep: NaRep | None = None, + na_rep: str | None = None, names: bool = False, space: int = 2, sparsify=None, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a459b1dc610a2..8b583dbfac30c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,7 +15,6 @@ ) from pandas._typing import ( Dtype, - NaRep, npt, ) from pandas.util._decorators import ( @@ -284,7 +283,7 @@ def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: def _format_native_types( self, *, - na_rep: NaRep = "", + na_rep: str = "", float_format=None, decimal: str = ".", quoting=None, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d7c953c370925..9f49c7456d9ce 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -25,7 +25,6 @@ from pandas._libs.lib import no_default from pandas._typing import ( Dtype, - NaRep, npt, ) from pandas.compat.numpy import function as nv @@ -229,7 +228,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header: list[str], na_rep: NaRep) -> list[str]: + def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: # Equivalent to Index implementation, but faster if not len(self._range): return header diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e329599920fed..baa45b1119716 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -30,7 +30,6 @@ F, FillnaOptions, IgnoreRaise, - NaRep, QuantileInterpolation, Shape, npt, @@ -540,7 +539,7 @@ def astype( return newb @final - def to_native_types(self, na_rep: NaRep = "nan", quoting=None, **kwargs) -> Block: + def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block: """convert to our native types format""" result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) return self.make_block(result) @@ -2287,7 +2286,7 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: def to_native_types( values: ArrayLike, *, - na_rep: NaRep = "nan", + na_rep: str = "nan", quoting=None, float_format=None, decimal: str = ".", diff --git a/pandas/core/series.py b/pandas/core/series.py index f6388f1636662..fc97a8f04e0cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,7 +49,6 @@ IndexLabel, Level, NaPosition, - NaRep, QuantileInterpolation, Renamer, SingleManager, @@ -1602,7 +1601,7 @@ def __repr__(self) -> str: def to_string( self, buf: None = ..., - na_rep: NaRep = ..., + na_rep: str = ..., float_format: str | None = ..., header: bool = ..., index: bool = ..., @@ -1618,7 +1617,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str], - na_rep: NaRep = ..., + na_rep: str = ..., float_format: str | None = ..., header: bool = ..., index: bool = ..., @@ -1633,7 +1632,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", float_format: str | None = None, header: bool = True, index: bool = True, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 021061f2b69ef..c4ddac088d901 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -26,7 +26,6 @@ from pandas._libs.lib import is_list_like from pandas._typing import ( IndexLabel, - NaRep, StorageOptions, ) from pandas.util._decorators import doc @@ -500,7 +499,7 @@ class ExcelFormatter: def __init__( self, df, - na_rep: NaRep = "", + na_rep: str = "", float_format: str | None = None, cols: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool = True, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 87d6cb406fa94..912c088d8b520 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -59,7 +59,6 @@ FloatFormatType, FormattersType, IndexLabel, - NaRep, StorageOptions, WriteBuffer, ) @@ -208,7 +207,7 @@ def __init__( categorical: Categorical, buf: IO[str] | None = None, length: bool = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", footer: bool = True, ) -> None: self.categorical = categorical @@ -274,7 +273,7 @@ def __init__( length: bool | str = True, header: bool = True, index: bool = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", name: bool = False, float_format: str | None = None, dtype: bool = True, @@ -571,7 +570,7 @@ def __init__( col_space: ColspaceArgType | None = None, header: bool | Sequence[str] = True, index: bool = True, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", formatters: FormattersType | None = None, justify: str | None = None, float_format: FloatFormatType | None = None, @@ -1254,7 +1253,7 @@ def format_array( values: Any, formatter: Callable | None, float_format: FloatFormatType | None = None, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", digits: int | None = None, space: str | int | None = None, justify: str = "right", @@ -1335,7 +1334,7 @@ def __init__( values: Any, digits: int = 7, formatter: Callable | None = None, - na_rep: NaRep = "NaN", + na_rep: str = "NaN", space: str | int = 12, float_format: FloatFormatType | None = None, justify: str = "right", @@ -1510,7 +1509,7 @@ def get_result_as_array(self) -> np.ndarray: the parameters given at initialisation, as a numpy array """ - def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: NaRep): + def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): mask = isna(values) formatted = np.array( [ @@ -1622,7 +1621,7 @@ class Datetime64Formatter(GenericArrayFormatter): def __init__( self, values: np.ndarray | Series | DatetimeIndex | DatetimeArray, - nat_rep: NaRep = "NaT", + nat_rep: str = "NaT", date_format: None = None, **kwargs, ) -> None: @@ -1770,7 +1769,7 @@ def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> return False -def _format_datetime64(x: NaTType | Timestamp, nat_rep: NaRep = "NaT") -> str: +def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: if x is NaT: return nat_rep @@ -1781,7 +1780,7 @@ def _format_datetime64(x: NaTType | Timestamp, nat_rep: NaRep = "NaT") -> str: def _format_datetime64_dateonly( x: NaTType | Timestamp, - nat_rep: NaRep = "NaT", + nat_rep: str = "NaT", date_format: str | None = None, ) -> str: if isinstance(x, NaTType): @@ -1795,7 +1794,7 @@ def _format_datetime64_dateonly( def get_format_datetime64( - is_dates_only: bool, nat_rep: NaRep = "NaT", date_format: str | None = None + is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None ) -> Callable: """Return a formatter callable taking a datetime64 as input and providing a string as output""" @@ -1841,7 +1840,7 @@ class Timedelta64Formatter(GenericArrayFormatter): def __init__( self, values: np.ndarray | TimedeltaIndex, - nat_rep: NaRep = "NaT", + nat_rep: str = "NaT", box: bool = False, **kwargs, ) -> None: @@ -1858,7 +1857,7 @@ def _format_strings(self) -> list[str]: def get_format_timedelta64( values: np.ndarray | TimedeltaIndex | TimedeltaArray, - nat_rep: NaRep | float = "NaT", + nat_rep: str = "NaT", box: bool = False, ) -> Callable: """ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1c3fbcb09f087..19f377580e599 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -29,7 +29,6 @@ FilePath, IndexLabel, Level, - NaRep, QuantileInterpolation, Scalar, StorageOptions, @@ -245,7 +244,7 @@ def __init__( caption: str | tuple | None = None, table_attributes: str | None = None, cell_ids: bool = True, - na_rep: NaRep | None = None, + na_rep: str | None = None, uuid_len: int = 5, decimal: str | None = None, thousands: str | None = None, @@ -563,7 +562,7 @@ def to_excel( self, excel_writer, sheet_name: str = "Sheet1", - na_rep: NaRep = "", + na_rep: str = "", float_format: str | None = None, columns: Sequence[Hashable] | None = None, header: Sequence[Hashable] | bool = True, @@ -2650,7 +2649,7 @@ def set_table_styles( self.table_styles = table_styles return self - def set_na_rep(self, na_rep: NaRep) -> StylerRenderer: + def set_na_rep(self, na_rep: str) -> StylerRenderer: """ Set the missing data representation on a ``Styler``. diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 1c09e0151e7de..75e872d986556 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -25,7 +25,6 @@ from pandas._typing import ( Axis, Level, - NaRep, ) from pandas.compat._optional import import_optional_dependency @@ -929,7 +928,7 @@ def format( self, formatter: ExtFormatter | None = None, subset: Subset | None = None, - na_rep: NaRep | None = None, + na_rep: str | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, @@ -1160,7 +1159,7 @@ def format_index( formatter: ExtFormatter | None = None, axis: int | str = 0, level: Level | list[Level] | None = None, - na_rep: NaRep | None = None, + na_rep: str | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, @@ -1752,7 +1751,7 @@ def _render_href(x, format): def _maybe_wrap_formatter( formatter: BaseFormatter | None = None, - na_rep: NaRep | None = None, + na_rep: str | None = None, precision: int | None = None, decimal: str = ".", thousands: str | None = None, diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 8939438fd48b4..eb1835f0392b0 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -13,7 +13,6 @@ from pandas._typing import ( CompressionOptions, FilePath, - NaRep, ReadBuffer, StorageOptions, WriteBuffer, @@ -107,7 +106,7 @@ def __init__( index: bool = True, root_name: str | None = "data", row_name: str | None = "row", - na_rep: NaRep | None = None, + na_rep: str | None = None, attr_cols: list[str] | None = None, elem_cols: list[str] | None = None, namespaces: dict[str | None, str] | None = None, From b7ef0d816e3ec3252fddac8d0688d603edfdb9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 13 Sep 2022 18:44:02 -0400 Subject: [PATCH 4/8] add float for some functions --- pandas/core/arrays/datetimelike.py | 4 ++-- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/io/formats/format.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index edcc0cd97bd08..92385675bff83 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -313,7 +313,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: str = "NaT", date_format=None + self, *, na_rep: str | float = "NaT", date_format=None ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. @@ -1790,7 +1790,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: 'March 10, 2018, 09:00:02 AM'], dtype='object') """ - result = self._format_native_types(date_format=date_format, na_rep=str(np.nan)) + result = self._format_native_types(date_format=date_format, na_rep=np.nan) return result.astype(object, copy=False) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a51b3b3e20683..142fda46d2dc1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -648,7 +648,7 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: str = "NaT", date_format=None, **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_datetime64_from_values diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f92e48bbdc655..c0d476c2452b8 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -642,7 +642,7 @@ def _formatter(self, boxed: bool = False): @dtl.ravel_compat def _format_native_types( - self, *, na_rep: str = "NaT", date_format=None, **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: """ actually format my specific types diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a26c8fde9fcca..78930f8d200b7 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -373,7 +373,7 @@ def _formatter(self, boxed: bool = False): return get_format_timedelta64(self, box=True) def _format_native_types( - self, *, na_rep: str = "NaT", date_format=None, **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, **kwargs ) -> npt.NDArray[np.object_]: from pandas.io.formats.format import get_format_timedelta64 diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f4838d26962aa..854a06b6150fc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1857,7 +1857,7 @@ def _format_strings(self) -> list[str]: def get_format_timedelta64( values: np.ndarray | TimedeltaIndex | TimedeltaArray, - nat_rep: str = "NaT", + nat_rep: str | float = "NaT", box: bool = False, ) -> Callable: """ From e8378cc6f605a958ed800583fbbdc1da1c97ac82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 13 Sep 2022 20:57:00 -0400 Subject: [PATCH 5/8] and the same for the few float default arguments --- .pre-commit-config.yaml | 2 +- pandas/_testing/__init__.py | 4 ++-- pandas/_testing/asserters.py | 8 ++++---- pandas/core/groupby/groupby.py | 9 +++++++-- pandas/core/resample.py | 3 ++- pandas/io/formats/style_render.py | 2 +- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 75c80f515359c..3e845d28eeae7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -258,7 +258,7 @@ repos: |/_testing/ - id: autotyping name: autotyping - entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bytes-param --str-param + entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bytes-param --str-param --float-param types_or: [python, pyi] files: ^pandas exclude: ^(pandas/tests|pandas/io/clipboard) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 54d743523e56f..7deb1b7cc7270 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -766,7 +766,7 @@ def makeCustomDataframe( return DataFrame(data, index, columns, dtype=dtype) -def _create_missing_idx(nrows, ncols, density, random_state=None): +def _create_missing_idx(nrows, ncols, density: float, random_state=None): if random_state is None: random_state = np.random else: @@ -793,7 +793,7 @@ def _gen_unique_rand(rng, _extra_size): return i.tolist(), j.tolist() -def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame: +def makeMissingDataframe(density: float = 0.9, random_state=None) -> DataFrame: df = makeDataFrame() i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) df.values[i, j] = np.nan diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 05a7c883d51ad..89e94b766bc41 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -883,8 +883,8 @@ def assert_series_equal( check_category_order=True, check_freq=True, check_flags=True, - rtol=1.0e-5, - atol=1.0e-8, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, obj: str = "Series", *, check_index=True, @@ -1153,8 +1153,8 @@ def assert_frame_equal( check_like=False, check_freq=True, check_flags=True, - rtol=1.0e-5, - atol=1.0e-8, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, obj: str = "DataFrame", ) -> None: """ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 164bc222a1ea6..733aae8851a04 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -44,6 +44,7 @@ class providing the base-class of operations. ) import pandas._libs.groupby as libgroupby from pandas._typing import ( + AnyArrayLike, ArrayLike, Dtype, FillnaOptions, @@ -3185,7 +3186,7 @@ def nth( @final def quantile( self, - q=0.5, + q: float | AnyArrayLike = 0.5, interpolation: str = "linear", numeric_only: bool | lib.NoDefault = lib.no_default, ): @@ -3306,7 +3307,11 @@ def post_processor( orig_scalar = is_scalar(q) if orig_scalar: - q = [q] + # error: Incompatible types in assignment (expression has type "List[ + # Union[float, ExtensionArray, ndarray[Any, Any], Index, Series]]", + # variable has type "Union[float, Union[Union[ExtensionArray, ndarray[ + # Any, Any]], Index, Series]]") + q = [q] # type: ignore[assignment] qs = np.array(q, dtype=np.float64) ids, _, ngroups = self.grouper.group_info diff --git a/pandas/core/resample.py b/pandas/core/resample.py index dd7347eaf7ade..84d46a14afbe9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -27,6 +27,7 @@ to_offset, ) from pandas._typing import ( + AnyArrayLike, Frequency, IndexLabel, NDFrameT, @@ -1027,7 +1028,7 @@ def count(self): return result - def quantile(self, q=0.5, **kwargs): + def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs): """ Return value at the given quantile. diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 75e872d986556..1f5be4e36b11d 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1528,7 +1528,7 @@ def _get_trimming_maximums( max_elements, max_rows=None, max_cols=None, - scaling_factor=0.8, + scaling_factor: float = 0.8, ) -> tuple[int, int]: """ Recursively reduce the number of rows and columns to satisfy max elements. From f84ba4ede15987bb2abbcb64d84359f669c4dcf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 14 Sep 2022 13:10:04 -0400 Subject: [PATCH 6/8] define a few more literal constants --- pandas/_typing.py | 5 +++++ pandas/core/base.py | 7 +++---- pandas/core/frame.py | 15 ++++++++------- pandas/core/generic.py | 7 ++++--- pandas/core/groupby/generic.py | 7 +++---- pandas/core/indexes/base.py | 9 +++------ pandas/core/indexes/multi.py | 9 +++------ pandas/core/nanops.py | 8 +++----- pandas/core/series.py | 25 ++++++++++--------------- pandas/core/strings/accessor.py | 3 ++- 10 files changed, 44 insertions(+), 51 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index b35060e57ed07..d603459ae0452 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -340,3 +340,8 @@ def closed(self) -> bool: TimeNonexistent = Union[ Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta ] +DropKeep = Literal["first", "last", False] +CorrelationMethod = Union[ + Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float] +] +AlignJoin = Literal["outer", "inner", "left", "right"] diff --git a/pandas/core/base.py b/pandas/core/base.py index 274506d0983fa..ab8e4a22367f4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -79,6 +79,7 @@ if TYPE_CHECKING: from pandas._typing import ( + DropKeep, NumpySorter, NumpyValueArrayLike, ) @@ -1303,15 +1304,13 @@ def searchsorted( sorter=sorter, ) - def drop_duplicates(self, keep: Literal["first", "last", False] = "first"): + def drop_duplicates(self, keep: DropKeep = "first"): duplicated = self._duplicated(keep=keep) # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] @final - def _duplicated( - self, keep: Literal["first", "last", False] = "first" - ) -> npt.NDArray[np.bool_]: + def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: return duplicated(self._values, keep=keep) def _arith_method(self, other, op): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f85c981f2c72..dc01ce7eca79f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -50,6 +50,7 @@ ) from pandas._typing import ( AggFuncType, + AlignJoin, AnyAll, AnyArrayLike, ArrayLike, @@ -57,6 +58,8 @@ Axis, ColspaceArgType, CompressionOptions, + CorrelationMethod, + DropKeep, Dtype, DtypeObj, FilePath, @@ -5083,7 +5086,7 @@ def _reindex_multi( def align( self, other: DataFrame, - join: Literal["outer", "inner", "left", "right"] = "outer", + join: AlignJoin = "outer", axis: Axis | None = None, level: Level = None, copy: bool = True, @@ -6622,7 +6625,7 @@ def dropna( def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, - keep: Literal["first", "last", False] = "first", + keep: DropKeep = "first", inplace: bool = False, ignore_index: bool = False, ) -> DataFrame | None: @@ -6721,7 +6724,7 @@ def drop_duplicates( def duplicated( self, subset: Hashable | Sequence[Hashable] | None = None, - keep: Literal["first", "last", False] = "first", + keep: DropKeep = "first", ) -> Series: """ Return boolean Series denoting duplicate rows. @@ -10270,8 +10273,7 @@ def _series_round(ser: Series, decimals: int): def corr( self, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", min_periods: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: @@ -10527,8 +10529,7 @@ def corrwith( other: DataFrame | Series, axis: Axis = 0, drop: bool = False, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", numeric_only: bool | lib.NoDefault = lib.no_default, ) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5ab827469e42..780c804efdf6e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -41,6 +41,7 @@ to_offset, ) from pandas._typing import ( + AlignJoin, AnyArrayLike, ArrayLike, Axis, @@ -9302,7 +9303,7 @@ def compare( def align( self: NDFrameT, other: NDFrameT, - join: Literal["outer", "inner", "left", "right"] = "outer", + join: AlignJoin = "outer", axis: Axis | None = None, level: Level = None, copy: bool_t = True, @@ -9495,7 +9496,7 @@ def align( def _align_frame( self, other, - join: Literal["outer", "inner", "left", "right"] = "outer", + join: AlignJoin = "outer", axis=None, level=None, copy: bool_t = True, @@ -9559,7 +9560,7 @@ def _align_frame( def _align_series( self, other, - join: Literal["outer", "inner", "left", "right"] = "outer", + join: AlignJoin = "outer", axis=None, level=None, copy: bool_t = True, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c06042915cbc2..b4fb1a612f621 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -37,6 +37,7 @@ from pandas._typing import ( ArrayLike, Axis, + CorrelationMethod, FillnaOptions, IndexLabel, Level, @@ -828,8 +829,7 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: def corr( self, other: Series, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", min_periods: int | None = None, ) -> Series: result = self._op_via_apply( @@ -2190,8 +2190,7 @@ def corrwith( other: DataFrame | Series, axis: Axis = 0, drop: bool = False, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", numeric_only: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: result = self._op_via_apply( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 578c14aea5d80..182eb5a21f8b3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,6 +48,7 @@ AnyAll, ArrayLike, Axes, + DropKeep, Dtype, DtypeObj, F, @@ -3028,9 +3029,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: return self._shallow_copy(result) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates( - self: _IndexT, keep: Literal["first", "last", False] = "first" - ) -> _IndexT: + def drop_duplicates(self: _IndexT, keep: DropKeep = "first") -> _IndexT: """ Return Index with duplicate values removed. @@ -3081,9 +3080,7 @@ def drop_duplicates( return super().drop_duplicates(keep=keep) - def duplicated( - self, keep: Literal["first", "last", False] = "first" - ) -> npt.NDArray[np.bool_]: + def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: """ Indicate duplicate index values. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d372227ac8f2b..2aa7eafe00dca 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -31,6 +31,7 @@ from pandas._typing import ( AnyAll, AnyArrayLike, + DropKeep, DtypeObj, F, IgnoreRaise, @@ -1619,9 +1620,7 @@ def _inferred_type_levels(self) -> list[str]: return [i.inferred_type for i in self.levels] @doc(Index.duplicated) - def duplicated( - self, keep: Literal["last", "first", False] = "first" - ) -> npt.NDArray[np.bool_]: + def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: shape = tuple(len(lev) for lev in self.levels) ids = get_group_index(self.codes, shape, sort=False, xnull=False) @@ -3863,9 +3862,7 @@ def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | No rename = set_names @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates( - self, keep: Literal["first", "last", False] = "first" - ) -> MultiIndex: + def drop_duplicates(self, keep: DropKeep = "first") -> MultiIndex: return super().drop_duplicates(keep=keep) # --------------------------------------------------------------- diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index c9d8ce46c7cdf..799cd9dbfc2e6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -6,7 +6,6 @@ from typing import ( Any, Callable, - Literal, cast, ) import warnings @@ -23,6 +22,7 @@ ) from pandas._typing import ( ArrayLike, + CorrelationMethod, Dtype, DtypeObj, F, @@ -1535,8 +1535,7 @@ def nancorr( a: np.ndarray, b: np.ndarray, *, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", min_periods: int | None = None, ) -> float: """ @@ -1561,8 +1560,7 @@ def nancorr( def get_corr_func( - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] + method: CorrelationMethod, ) -> Callable[[np.ndarray, np.ndarray], float]: if method == "kendall": from scipy.stats import kendalltau diff --git a/pandas/core/series.py b/pandas/core/series.py index fc97a8f04e0cc..a4a7a2c800a54 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -35,10 +35,13 @@ from pandas._libs.lib import no_default from pandas._typing import ( AggFuncType, + AlignJoin, AnyAll, AnyArrayLike, ArrayLike, Axis, + CorrelationMethod, + DropKeep, Dtype, DtypeObj, FilePath, @@ -2247,29 +2250,22 @@ def unique(self) -> ArrayLike: @overload def drop_duplicates( - self, - keep: Literal["first", "last", False] = ..., - *, - inplace: Literal[False] = ..., + self, keep: DropKeep = ..., *, inplace: Literal[False] = ... ) -> Series: ... @overload - def drop_duplicates( - self, keep: Literal["first", "last", False] = ..., *, inplace: Literal[True] - ) -> None: + def drop_duplicates(self, keep: DropKeep = ..., *, inplace: Literal[True]) -> None: ... @overload def drop_duplicates( - self, keep: Literal["first", "last", False] = ..., *, inplace: bool = ... + self, keep: DropKeep = ..., *, inplace: bool = ... ) -> Series | None: ... @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates( - self, keep: Literal["first", "last", False] = "first", inplace=False - ) -> Series | None: + def drop_duplicates(self, keep: DropKeep = "first", inplace=False) -> Series | None: """ Return Series with duplicate values removed. @@ -2353,7 +2349,7 @@ def drop_duplicates( else: return result - def duplicated(self, keep: Literal["first", "last", False] = "first") -> Series: + def duplicated(self, keep: DropKeep = "first") -> Series: """ Indicate duplicate Series values. @@ -2698,8 +2694,7 @@ def quantile( def corr( self, other: Series, - method: Literal["pearson", "kendall", "spearman"] - | Callable[[np.ndarray, np.ndarray], float] = "pearson", + method: CorrelationMethod = "pearson", min_periods: int | None = None, ) -> float: """ @@ -4852,7 +4847,7 @@ def _needs_reindex_multi(self, axes, method, level) -> bool: def align( self, other: Series, - join: Literal["outer", "inner", "left", "right"] = "outer", + join: AlignJoin = "outer", axis: Axis | None = None, level: Level = None, copy: bool = True, diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 641e115075c4b..f6a0cd48be09b 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -18,6 +18,7 @@ import pandas._libs.lib as lib from pandas._typing import ( + AlignJoin, DtypeObj, F, ) @@ -431,7 +432,7 @@ def cat( others=None, sep=None, na_rep=None, - join: Literal["outer", "inner", "left", "right"] = "left", + join: AlignJoin = "left", ) -> str | Series | Index: """ Concatenate strings in the Series/Index with given separator. From f19a70b0635363c33279ab014e1d4e568db581fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 14 Sep 2022 13:34:11 -0400 Subject: [PATCH 7/8] avoid itertools.cycle mypy error --- pandas/core/reshape/encoding.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 1b20eb2c37d63..ce4e07bb3205b 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -2,7 +2,10 @@ from collections import defaultdict import itertools -from typing import Hashable +from typing import ( + Hashable, + Iterable, +) import numpy as np @@ -25,7 +28,7 @@ def get_dummies( data, prefix=None, - prefix_sep: str | list[str] | dict[str, str] = "_", + prefix_sep: str | Iterable[str] | dict[str, str] = "_", dummy_na: bool = False, columns=None, sparse: bool = False, @@ -175,9 +178,7 @@ def check_len(item, name): # validate separators if isinstance(prefix_sep, str): - # error: Incompatible types in assignment (expression has type - # "cycle[str]", variable has type "str") - prefix_sep = itertools.cycle([prefix_sep]) # type: ignore[assignment] + prefix_sep = [prefix_sep] elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] @@ -223,7 +224,7 @@ def check_len(item, name): def _get_dummies_1d( data, prefix, - prefix_sep: str | list[str] | dict[str, str] = "_", + prefix_sep: str | Iterable[str] | dict[str, str] = "_", dummy_na: bool = False, sparse: bool = False, drop_first: bool = False, From 0c5a4c133934f664025790703750449e3b0b0ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 14 Sep 2022 16:11:53 -0400 Subject: [PATCH 8/8] revert mistake --- pandas/core/reshape/encoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index ce4e07bb3205b..6670633fcc587 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -178,7 +178,7 @@ def check_len(item, name): # validate separators if isinstance(prefix_sep, str): - prefix_sep = [prefix_sep] + prefix_sep = itertools.cycle([prefix_sep]) elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns]