diff --git a/pandas/_typing.py b/pandas/_typing.py index a9852dd4b13cf..21ea5ce5e4a83 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -21,6 +21,7 @@ Dict, Hashable, List, + Literal, Mapping, Optional, Sequence, @@ -37,7 +38,6 @@ # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from typing import ( - Literal, TypedDict, final, ) @@ -123,6 +123,8 @@ Frequency = Union[str, "DateOffset"] Axes = Collection[Any] RandomState = Union[int, ArrayLike, np.random.Generator, np.random.RandomState] +MergeTypes = Literal["inner", "outer", "left", "right", "cross"] +ConcatTypes = Literal["inner", "outer"] # dtypes NpDtype = Union[str, np.dtype] diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 69e2650a15f16..7f7f59615e8df 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -9,6 +9,7 @@ Hashable, Iterator, List, + Literal, cast, ) import warnings @@ -518,7 +519,10 @@ def apply_multiple(self) -> FrameOrSeriesUnion: return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs) def normalize_dictlike_arg( - self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict + self, + how: Literal["apply", "agg", "transform"], + obj: FrameOrSeriesUnion, + func: AggFuncTypeDict, ) -> AggFuncTypeDict: """ Handler for dict-like argument. @@ -527,7 +531,10 @@ def normalize_dictlike_arg( that a nested renamer is not passed. Also normalizes to all lists when values consists of a mix of list and non-lists. """ - assert how in ("apply", "agg", "transform") + if how not in ("apply", "agg", "transform"): + raise ValueError( + "Value for how argument must be one of : apply, agg, transform" + ) # Can't use func.values(); wouldn't work for a Series if ( diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 3909875e5660a..3018219219e58 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -4,6 +4,8 @@ """ from __future__ import annotations +from typing import Literal + import numpy as np from pandas._libs.lib import i8max @@ -75,7 +77,7 @@ def generate_regular_range( def _generate_range_overflow_safe( - endpoint: int, periods: int, stride: int, side: str = "start" + endpoint: int, periods: int, stride: int, side: Literal["start", "end"] = "start" ) -> int: """ Calculate the second endpoint for passing to np.arange, checking @@ -142,13 +144,14 @@ def _generate_range_overflow_safe( def _generate_range_overflow_safe_signed( - endpoint: int, periods: int, stride: int, side: str + endpoint: int, periods: int, stride: int, side: Literal["start", "end"] ) -> int: """ A special case for _generate_range_overflow_safe where `periods * stride` can be calculated without overflowing int64 bounds. """ assert side in ["start", "end"] + if side == "end": stride *= -1 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 92a906e9fd8b0..62554a067d1a0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -37,6 +37,7 @@ to_offset, tzconversion, ) +from pandas._typing import Dtype from pandas.errors import PerformanceWarning from pandas.core.dtypes.cast import astype_dt64_to_dt64tz @@ -1967,12 +1968,12 @@ def sequence_to_datetimes( def sequence_to_dt64ns( data, - dtype=None, - copy=False, - tz=None, - dayfirst=False, - yearfirst=False, - ambiguous="raise", + dtype: Dtype | None = None, + copy: bool = False, + tz: tzinfo | str = None, + dayfirst: bool = False, + yearfirst: bool = False, + ambiguous: str | bool = "raise", *, allow_object: bool = False, allow_mixed: bool = False, @@ -2126,10 +2127,10 @@ def sequence_to_dt64ns( def objects_to_datetime64ns( data: np.ndarray, - dayfirst, - yearfirst, - utc=False, - errors="raise", + dayfirst: bool, + yearfirst: bool, + utc: bool = False, + errors: Literal["raise", "coerce", "ignore"] = "raise", require_iso8601: bool = False, allow_object: bool = False, allow_mixed: bool = False, @@ -2164,7 +2165,10 @@ def objects_to_datetime64ns( ------ ValueError : if data cannot be converted to datetimes """ - assert errors in ["raise", "ignore", "coerce"] + if errors not in ["raise", "ignore", "coerce"]: + raise ValueError( + "Value for errors argument must be one of: raise, coerce, ignore" + ) # if str-dtype, convert data = np.array(data, copy=False, dtype=np.object_) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0acbb0c34266f..d2d15d3cbfe8b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9191,7 +9191,7 @@ def merge( sort: bool = False, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - indicator: bool = False, + indicator: bool | str = False, validate: str | None = None, ) -> DataFrame: from pandas.core.reshape.merge import merge diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f3063d9d71b2c..fe01335fac1bc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5691,7 +5691,9 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): if key is not None and not is_integer(key): raise self._invalid_indexer(form, key) - def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): + def _maybe_cast_slice_bound( + self, label, side: str_t, kind: Literal["loc", "getitem"] = no_default + ): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing @@ -5755,7 +5757,10 @@ def get_slice_bound(self, label, side: str_t, kind=None) -> int: int Index of label. """ - assert kind in ["loc", "getitem", None] + if kind not in ["loc", "getitem", None]: + raise ValueError( + "Value for kind argument must be one of: loc, getitem or None" + ) if side not in ("left", "right"): raise ValueError( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6275fe39558a3..35d85ebd0d974 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1200,7 +1200,8 @@ def where(self, other, cond, errors="raise") -> list[Block]: assert cond.ndim == self.ndim assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) - assert errors in ["raise", "ignore"] + if errors not in ["raise", "ignore"]: + raise ValueError("Value for errors argument must be one of: raise, ignore") transpose = self.ndim == 2 values = self.values diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f144821220e4b..0fd84494a4681 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -10,6 +10,7 @@ from typing import ( TYPE_CHECKING, Any, + Literal, cast, ) @@ -164,7 +165,7 @@ def clean_interp_method(method: str, index: Index, **kwargs) -> str: return method -def find_valid_index(values, *, how: str) -> int | None: +def find_valid_index(values, *, how: Literal["first", "last"]) -> int | None: """ Retrieves the index of the first valid value. @@ -178,7 +179,8 @@ def find_valid_index(values, *, how: str) -> int | None: ------- int or None """ - assert how in ["first", "last"] + if how not in ["first", "last"]: + raise ValueError("Value for how argument must be one of : first, last") if len(values) == 0: # early stop return None diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 143999a4677b3..8a3c6985b1f08 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -28,7 +28,9 @@ DtypeObj, FrameOrSeries, IndexLabel, + MergeTypes, Suffixes, + TimedeltaConvertibleTypes, ) from pandas.errors import MergeError from pandas.util._decorators import ( @@ -92,7 +94,7 @@ def merge( left: DataFrame | Series, right: DataFrame | Series, - how: str = "inner", + how: MergeTypes = "inner", on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, @@ -101,7 +103,7 @@ def merge( sort: bool = False, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - indicator: bool = False, + indicator: bool | str = False, validate: str | None = None, ) -> DataFrame: op = _MergeOperation( @@ -331,11 +333,11 @@ def merge_asof( right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, - by=None, - left_by=None, - right_by=None, + by: IndexLabel | None = None, + left_by: Hashable | None = None, + right_by: Hashable | None = None, suffixes: Suffixes = ("_x", "_y"), - tolerance=None, + tolerance: None | TimedeltaConvertibleTypes = None, allow_exact_matches: bool = True, direction: str = "backward", ) -> DataFrame: @@ -622,7 +624,7 @@ def __init__( sort: bool = True, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - indicator: bool = False, + indicator: bool | str = False, validate: str | None = None, ): _left = _validate_operand(left) diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 7d8028de23257..d8b2a58218f58 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -59,7 +59,8 @@ def get_default_engine(ext, mode="reader"): "xls": "xlwt", "ods": "odf", } - assert mode in ["reader", "writer"] + if mode not in ["reader", "writer"]: + raise ValueError('File mode must be either "reader" or "writer".') if mode == "writer": # Prefer xlsxwriter over openpyxl if installed xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn") diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c55ac6ce228bf..7b84dbb635133 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -445,7 +445,7 @@ def _maybe_add_count(base: str, count: float) -> str: # Frequency comparison -def is_subperiod(source, target) -> bool: +def is_subperiod(source: str | DateOffset, target: str | DateOffset) -> bool: """ Returns True if downsampling is possible between source and target frequencies @@ -501,7 +501,7 @@ def is_subperiod(source, target) -> bool: return False -def is_superperiod(source, target) -> bool: +def is_superperiod(source: str | DateOffset, target: str | DateOffset) -> bool: """ Returns True if upsampling is possible between source and target frequencies @@ -559,7 +559,7 @@ def is_superperiod(source, target) -> bool: return False -def _maybe_coerce_freq(code) -> str: +def _maybe_coerce_freq(code: str | DateOffset) -> str: """we might need to coerce a code to a rule_code and uppercase it diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 54ac116afe3cf..165f3fcb62bd5 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -4,6 +4,11 @@ datetime, timedelta, ) +from typing import ( + Any, + Callable, + Tuple, +) import warnings from dateutil.relativedelta import ( # noqa @@ -33,6 +38,8 @@ Easter, ) +from pandas._typing import TimestampConvertibleTypes + def next_monday(dt: datetime) -> datetime: """ @@ -151,27 +158,39 @@ class Holiday: def __init__( self, - name, - year=None, - month=None, - day=None, - offset=None, - observance=None, - start_date=None, - end_date=None, - days_of_week=None, + name: str, + year: int | None = None, + month: int | None = None, + day: int | None = None, + offset: list[Any] | Any = None, + observance: Callable[..., DatetimeIndex] = None, + start_date: TimestampConvertibleTypes = None, + end_date: TimestampConvertibleTypes = None, + days_of_week: tuple[int, ...] = None, ): """ Parameters ---------- name : str Name of the holiday , defaults to class name + year : int, optional + the year in which the holiday occurs + month : int, optional + the month in which the holiday occurs + day : int, optional + the day on which the holiday occurs offset : array of pandas.tseries.offsets or class from pandas.tseries.offsets computes offset from date observance: function computes when holiday is given a pandas Timestamp - days_of_week: + start_date : datetime-like + optionally constrain the period in which the holdiday occurs + can be any valid value for a pandas Timestamp + end_date : datetime-like + optionally constrain the period in which the holdiday occurs + can be any valid value for a pandas Timestamp + days_of_week: tuple(int, ...) provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday Monday=0,..,Sunday=6 @@ -239,7 +258,12 @@ def __repr__(self) -> str: repr = f"Holiday: {self.name} ({info})" return repr - def dates(self, start_date, end_date, return_name=False): + def dates( + self, + start_date: TimestampConvertibleTypes, + end_date: TimestampConvertibleTypes, + return_name: bool = False, + ): """ Calculate holidays observed between start date and end date @@ -286,7 +310,9 @@ def dates(self, start_date, end_date, return_name=False): return Series(self.name, index=holiday_dates) return holiday_dates - def _reference_dates(self, start_date, end_date): + def _reference_dates( + self, start_date: TimestampConvertibleTypes, end_date: TimestampConvertibleTypes + ): """ Get reference dates for the holiday. @@ -319,7 +345,7 @@ def _reference_dates(self, start_date, end_date): return dates - def _apply_rule(self, dates): + def _apply_rule(self, dates: DatetimeIndex): """ Apply the given offset/observance to a DatetimeIndex of dates. @@ -390,7 +416,7 @@ class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): end_date = Timestamp(datetime(2200, 12, 31)) _cache = None - def __init__(self, name=None, rules=None): + def __init__(self, name: str = None, rules: list[Holiday] = None): """ Initializes holiday object with a given set a rules. Normally classes just have the rules defined within them. @@ -417,7 +443,12 @@ def rule_from_name(self, name): return None - def holidays(self, start=None, end=None, return_name=False): + def holidays( + self, + start: TimestampConvertibleTypes = None, + end: TimestampConvertibleTypes = None, + return_name: bool = False + ) -> DatetimeIndex: """ Returns a curve with holidays between start_date and end_date