diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi new file mode 100644 index 0000000000000..0321bd026d83c --- /dev/null +++ b/pandas/_libs/interval.pyi @@ -0,0 +1,89 @@ +import numpy as np + +from pandas import ( + Timedelta, + Timestamp, +) + +VALID_CLOSED: frozenset[str] + + +class IntervalMixin: + closed: str + + @property + def closed_left(self) -> bool: ... + + @property + def closed_right(self) -> bool: ... + + @property + def open_left(self) -> bool: ... + + @property + def open_right(self) -> bool: ... + + @property + def mid(self): ... + + @property + def length(self): ... + + @property + def is_empty(self): ... + + def _check_closed_matches(self, other, name: str = ...) -> None: ... + + +class Interval(IntervalMixin): + left: int | float | Timestamp | Timedelta + right: int | float | Timestamp | Timedelta + + def __init__(self, left, right, closed: str = ...): ... + + def __contains__(self, key) -> bool: ... + def __str__(self) -> str: ... + def __add__(self, y): ... + def __sub__(self, y): ... + def __mul__(self, y): ... + def __truediv__(self, y): ... + def __floordiv__(self, y): ... + + def overlaps(self, other: Interval) -> bool: ... + + +def intervals_to_interval_bounds( + intervals: np.ndarray, + validate_closed: bool = ..., +) -> tuple[np.ndarray, np.ndarray, str]: ... + + +class IntervalTree(IntervalMixin): + def __init__(self, left, right, closed=..., leaf_size=...): ... + + @property + def left_sorter(self) -> np.ndarray: ... # np.ndarray[np.intp] + + @property + def right_sorter(self) -> np.ndarray: ... # np.ndarray[np.intp] + + @property + def is_overlapping(self) -> bool: ... + + @property + def is_monotonic_increasing(self) -> bool: ... + + def get_indexer( + self, + target: np.ndarray, # scalar_t[:] + ) -> np.ndarray: ... # np.ndarray[np.intp] + + def get_indexer_non_unique( + self, + target: np.ndarray, # scalar_t[:] + ) -> tuple[ + np.ndarray, # np.ndarray[np.intp] + np.ndarray, # np.ndarray[np.intp] + ]: ... + + def clear_mapping(self) -> None: ... diff --git a/pandas/_typing.py b/pandas/_typing.py index 12d23786c3387..98618dd8712c9 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,5 +1,7 @@ from datetime import ( + date, datetime, + time, timedelta, tzinfo, ) @@ -86,10 +88,10 @@ # scalars -PythonScalar = Union[str, int, float, bool] +PythonScalar = Optional[Union[str, int, float, complex, bool, date, time, timedelta]] DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] -Scalar = Union[PythonScalar, PandasScalar] +Scalar = Union[PythonScalar, PandasScalar, np.number, np.datetime64, np.timedelta64] # timestamp and timedelta convertible types diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ecc45357db8c1..26fcafe348a42 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -243,7 +243,10 @@ def contains(cat, key, container): return any(loc_ in container for loc_ in loc) -class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): +# error: Cannot determine type of 'repeat' in base class 'ExtensionArray' +class Categorical( # type: ignore[misc] + NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin +): """ Represent a categorical variable in classic R / S-plus fashion. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 08cb12a1373bb..520ce590fbdaf 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -141,7 +141,10 @@ class InvalidComparison(Exception): pass -class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): +# error: Cannot determine type of 'repeat' in base class 'ExtensionArray' +class DatetimeLikeArrayMixin( # type: ignore[misc] + OpsMixin, NDArrayBackedExtensionArray +): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 92a906e9fd8b0..d1e7b9e1b720e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -148,7 +148,8 @@ def f(self): return property(f) -class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): +# error: Cannot determine type of 'repeat' in base class 'ExtensionArray' +class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2318cae004c5a..aea6a6ac57716 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -7,6 +7,8 @@ ) import textwrap from typing import ( + TYPE_CHECKING, + Generic, Sequence, TypeVar, cast, @@ -84,7 +86,14 @@ unpack_zerodim_and_defer, ) +if TYPE_CHECKING: + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) + IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") +S = TypeVar("S", np.ndarray, "DatetimeArray", "TimedeltaArray") _interval_shared_docs: dict[str, str] = {} @@ -186,11 +195,15 @@ ), } ) -class IntervalArray(IntervalMixin, ExtensionArray): +class IntervalArray(IntervalMixin, ExtensionArray, Generic[S]): ndim = 1 can_hold_na = True _na_value = _fill_value = np.nan + _dtype: IntervalDtype + _left: S + _right: S + # --------------------------------------------------------------------- # Constructors @@ -586,7 +599,12 @@ def _validate(self): "location both left and right sides" ) raise ValueError(msg) - if not (self._left[left_mask] <= self._right[left_mask]).all(): + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + if not ( # type: ignore[union-attr] + # error: Unsupported operand types for <= ("Timestamp" and "Timedelta") + self._left[left_mask] # type: ignore[operator] + <= self._right[left_mask] + ).all(): msg = "left side of interval must be <= right side" raise ValueError(msg) @@ -930,9 +948,9 @@ def shift( from pandas import Index fill_value = Index(self._left, copy=False)._na_value - empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + empty = type(self).from_breaks([fill_value] * (empty_len + 1)) else: - empty = self._from_sequence([fill_value] * empty_len) + empty = type(self)._from_sequence([fill_value] * empty_len) if periods > 0: a = empty @@ -1355,15 +1373,31 @@ def is_non_overlapping_monotonic(self) -> bool: # at a point when both sides of intervals are included if self.closed == "both": return bool( - (self._right[:-1] < self._left[1:]).all() - or (self._left[:-1] > self._right[1:]).all() + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + # error: Unsupported operand types for > ("Timedelta" and "Timestamp") + ( # type: ignore[union-attr] + self._right[:-1] < self._left[1:] # type: ignore[operator] + ).all() + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + # error: Unsupported operand types for > ("Timedelta" and "Timestamp") + or ( # type: ignore[union-attr] + self._left[:-1] > self._right[1:] # type: ignore[operator] + ).all() ) # non-strict inequality when closed != 'both'; at least one side is # not included in the intervals, so equality does not imply overlapping return bool( - (self._right[:-1] <= self._left[1:]).all() - or (self._left[:-1] >= self._right[1:]).all() + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + # error: Unsupported operand types for <= ("Timestamp" and "Timedelta") + ( # type: ignore[union-attr] + self._right[:-1] <= self._left[1:] # type: ignore[operator] + ).all() + # error: Item "bool" of "Union[Any, bool]" has no attribute "all" + # error: Unsupported operand types for >= ("Timedelta" and "Timestamp") + or ( # type: ignore[union-attr] + self._left[:-1] >= self._right[1:] # type: ignore[operator] + ).all() ) # --------------------------------------------------------------------- diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index d274501143916..b4f9099ea4203 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -215,7 +215,7 @@ def to_numpy( # type: ignore[override] self, dtype: NpDtype | None = None, copy: bool = False, - na_value: Scalar = lib.no_default, + na_value: Scalar | lib.NoDefault = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ec7bd132832d1..31b1f83dd2312 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -27,7 +27,8 @@ from pandas.core.strings.object_array import ObjectStringArrayMixin -class PandasArray( +# error: Cannot determine type of 'repeat' in base class 'ExtensionArray' +class PandasArray( # type: ignore[misc] OpsMixin, NDArrayBackedExtensionArray, NDArrayOperatorsMixin, diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8d150c8f6ad3d..3eb8df6241685 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -228,7 +228,8 @@ class BaseStringArray(ExtensionArray): pass -class StringArray(BaseStringArray, PandasArray): +# error: Cannot determine type of 'repeat' in base class 'ExtensionArray' +class StringArray(BaseStringArray, PandasArray): # type: ignore[misc] """ Extension array for string data. @@ -311,8 +312,6 @@ def __init__(self, values, copy=False): values = extract_array(values) super().__init__(values, copy=copy) - # error: Incompatible types in assignment (expression has type "StringDtype", - # variable has type "PandasDtype") NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python")) if not isinstance(values, type(self)): self._validate() diff --git a/pandas/core/base.py b/pandas/core/base.py index ae7e1a1062cfb..c0ea622a01ceb 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1101,7 +1101,8 @@ def _memory_usage(self, deep: bool = False) -> int: return v @doc( - algorithms.factorize, + # error: Cannot determine type of 'factorize' + algorithms.factorize, # type: ignore[has-type] values="", order="", size_hint="", diff --git a/pandas/core/common.py b/pandas/core/common.py index 183607ebb489d..074af672c3bde 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -30,7 +30,6 @@ from pandas._typing import ( AnyArrayLike, NpDtype, - Scalar, T, ) from pandas.compat import np_version_under1p18 @@ -487,7 +486,7 @@ def f(x): def convert_to_list_like( - values: Scalar | Iterable | AnyArrayLike, + values: Any, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index f733a5c43dfb3..901d4ccd7cb0d 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -605,6 +605,12 @@ def __repr__(self) -> str: def evaluate(self): """create and return the numexpr condition and filter""" + if self.terms is None: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid condition" + ) + try: self.condition = self.terms.prune(ConditionBinOp) except AttributeError as err: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 433d45d94167d..77a9c1c88d2f5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -209,7 +209,11 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: # GH#36541: can't fill array directly with pd.NaT # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) # ValueError: cannot convert float NaN to integer - value = dtype.type("NaT", "ns") + # error: Incompatible types in assignment (expression has type + # "Union[generic, Any]", variable has type "Union[Union[str, int, float, + # complex, bool, date, time, timedelta, None], Union[Period, Timestamp, + # Timedelta, Interval], number[Any], datetime64, timedelta64]") + value = dtype.type("NaT", "ns") # type: ignore[assignment] elif isinstance(value, Timestamp): if value.tz is None: value = value.to_datetime64() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fbfee9a1f524c..da771f3b88913 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -36,6 +36,7 @@ ) from pandas.errors import InvalidIndexError from pandas.util._decorators import ( + Appender, cache_readonly, doc, ) @@ -263,7 +264,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin): # -------------------------------------------------------------------- # methods that dispatch to DatetimeArray and wrap result - @doc(DatetimeArray.strftime) + # error: Cannot determine type of 'strftime' + @Appender(DatetimeArray.strftime.__doc__) # type: ignore[has-type] def strftime(self, date_format) -> Index: arr = self._data.strftime(date_format) return Index(arr, name=self.name) @@ -273,12 +275,14 @@ def tz_convert(self, tz) -> DatetimeIndex: arr = self._data.tz_convert(tz) return type(self)._simple_new(arr, name=self.name) - @doc(DatetimeArray.tz_localize) + # error: Cannot determine type of 'tz_localize' + @doc(DatetimeArray.tz_localize) # type: ignore[has-type] def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex: arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) - @doc(DatetimeArray.to_period) + # error: Cannot determine type of 'to_period' + @doc(DatetimeArray.to_period) # type: ignore[has-type] def to_period(self, freq=None) -> PeriodIndex: from pandas.core.indexes.api import PeriodIndex diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 6ff20f7d009bc..8cdd5729d85a4 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -282,7 +282,9 @@ def __getitem__(self, key): return type(self)(result, name=self._name) # Unpack to ndarray for MPL compat - result = result._ndarray + # error: Item "IntervalArray" of "Union[Any, IntervalArray, + # NDArrayBackedExtensionArray]" has no attribute "_ndarray" + result = result._ndarray # type: ignore[union-attr] # Includes cases where we get a 2D ndarray back for MPL compat deprecate_ndim_indexing(result) @@ -438,8 +440,11 @@ class NDArrayBackedExtensionIndex(ExtensionIndex): _data: NDArrayBackedExtensionArray + # Argument 1 of "_simple_new" is incompatible with supertype "ExtensionIndex"; + # supertype defines the argument type as + # "Union[IntervalArray, NDArrayBackedExtensionArray]" @classmethod - def _simple_new( + def _simple_new( # type: ignore[override] cls, values: NDArrayBackedExtensionArray, name: Hashable = None, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 072ab7dff8e5b..fa9aa042c2b75 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -200,6 +200,8 @@ class IntervalIndex(ExtensionIndex): is_non_overlapping_monotonic: bool closed_left: bool closed_right: bool + open_left: bool + open_right: bool _data: IntervalArray _values: IntervalArray @@ -368,7 +370,8 @@ def inferred_type(self) -> str: """Return a string of the type inferred from the values""" return "interval" - @Appender(Index.memory_usage.__doc__) + # error: Cannot determine type of 'memory_usage' + @Appender(Index.memory_usage.__doc__) # type: ignore[has-type] def memory_usage(self, deep: bool = False) -> int: # we don't use an explicit engine # so return the bytes here @@ -511,8 +514,9 @@ def _maybe_convert_i8(self, key): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) - constructor = Interval if scalar else IntervalIndex.from_arrays - return constructor(left, right, closed=self.closed) + if scalar: + return Interval(left, right, closed=self.closed) + return IntervalIndex.from_arrays(left, right, closed=self.closed) if scalar: # Timestamp/Timedelta diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 821d696200175..54ab39819aaf0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1233,7 +1233,8 @@ def f(level): return any(f(level) for level in self._inferred_type_levels) - @doc(Index.memory_usage) + # error: Cannot determine type of 'memory_usage' + @doc(Index.memory_usage) # type: ignore[has-type] def memory_usage(self, deep: bool = False) -> int: # we are overwriting our base class to avoid # computing .values here which could materialize diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c1104b80a0a7a..53486fc0c191e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -166,8 +166,9 @@ class PeriodIndex(DatetimeIndexOpsMixin): # methods that dispatch to array and wrap result in Index # These are defined here instead of via inherit_names for mypy + # error: Cannot determine type of 'asfreq' @doc( - PeriodArray.asfreq, + PeriodArray.asfreq, # type: ignore[has-type] other="pandas.arrays.PeriodArray", other_name="PeriodArray", **_shared_doc_kwargs, @@ -184,21 +185,24 @@ def to_timestamp(self, freq=None, how="start") -> DatetimeIndex: # https://github.com/python/mypy/issues/1362 # error: Decorated property not supported @property # type:ignore[misc] - @doc(PeriodArray.hour.fget) + # error: Cannot determine type of 'hour' + @doc(PeriodArray.hour.fget) # type: ignore[has-type] def hour(self) -> Int64Index: return Int64Index(self._data.hour, name=self.name) # https://github.com/python/mypy/issues/1362 # error: Decorated property not supported @property # type:ignore[misc] - @doc(PeriodArray.minute.fget) + # error: Cannot determine type of 'minute' + @doc(PeriodArray.minute.fget) # type: ignore[has-type] def minute(self) -> Int64Index: return Int64Index(self._data.minute, name=self.name) # https://github.com/python/mypy/issues/1362 # error: Decorated property not supported @property # type:ignore[misc] - @doc(PeriodArray.second.fget) + # error: Cannot determine type of 'second' + @doc(PeriodArray.second.fget) # type: ignore[has-type] def second(self) -> Int64Index: return Int64Index(self._data.second, name=self.name) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 746246172b967..912be518b64b2 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -450,7 +450,8 @@ def tolist(self) -> list[int]: def __iter__(self): yield from self._range - @doc(Int64Index._shallow_copy) + # error: Cannot determine type of '_shallow_copy' + @doc(Int64Index._shallow_copy) # type: ignore[has-type] def _shallow_copy(self, values, name: Hashable = no_default): name = self.name if name is no_default else name diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c60ab06dd08f3..89b55e779ed8d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -118,7 +118,8 @@ def __new__( unit=None, freq=lib.no_default, closed=None, - dtype=TD64NS_DTYPE, + # error: Cannot determine type of 'TD64NS_DTYPE' + dtype=TD64NS_DTYPE, # type: ignore[has-type] copy=False, name=None, ): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 143999a4677b3..9420d02aea449 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1685,9 +1685,12 @@ def _asof_by_function(direction: str): _type_casters = { - "int64_t": ensure_int64, - "double": ensure_float64, - "object": ensure_object, + # error: Cannot determine type of 'ensure_int64' + "int64_t": ensure_int64, # type: ignore[has-type] + # error: Cannot determine type of 'ensure_float64' + "double": ensure_float64, # type: ignore[has-type] + # error: Cannot determine type of 'ensure_object' + "object": ensure_object, # type: ignore[has-type] } @@ -1951,12 +1954,11 @@ def flip(xs) -> np.ndarray: right_by_values = flip(right_by_values) # upcast 'by' parameter because HashTable is limited + # TODO: HashTable not so limited anymore? by_type = _get_cython_type_upcast(left_by_values.dtype) by_type_caster = _type_casters[by_type] - # error: Cannot call function of unknown type - left_by_values = by_type_caster(left_by_values) # type: ignore[operator] - # error: Cannot call function of unknown type - right_by_values = by_type_caster(right_by_values) # type: ignore[operator] + left_by_values = by_type_caster(left_by_values) + right_by_values = by_type_caster(right_by_values) # choose appropriate function by type func = _asof_by_function(self.direction) diff --git a/pandas/core/series.py b/pandas/core/series.py index 59ea6710ea6cd..505e415202bb0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2787,7 +2787,8 @@ def __rmatmul__(self, other): """ return self.dot(np.transpose(other)) - @doc(base.IndexOpsMixin.searchsorted, klass="Series") + # error: Cannot determine type of 'searchsorted' + @doc(base.IndexOpsMixin.searchsorted, klass="Series") # type: ignore[has-type] def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 2a86ff13a2edc..7e9219b636a62 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -9,6 +9,7 @@ Any, Callable, DefaultDict, + Hashable, Iterable, Sequence, cast, @@ -121,6 +122,8 @@ class BadLineHandleMethod(Enum): _implicit_index: bool = False _first_chunk: bool + index_col: int | Sequence[int] | None + index_names: list[Hashable] | None def __init__(self, kwds): @@ -130,7 +133,7 @@ def __init__(self, kwds): self.index_col = kwds.get("index_col", None) self.unnamed_cols: set = set() - self.index_names: list | None = None + self.index_names: list[Hashable] | None = None self.col_names = None self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) @@ -178,8 +181,14 @@ def __init__(self, kwds): if self.index_col is not None: is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) if not ( - is_sequence - and all(map(is_integer, self.index_col)) + # error: Argument 2 to "map" has incompatible type + # "Union[int, Sequence[int]]"; expected "Iterable[int]" + ( + is_sequence + and all( + map(is_integer, self.index_col) # type: ignore[arg-type] + ) + ) or is_integer(self.index_col) ): raise ValueError( @@ -298,8 +307,12 @@ def _should_parse_dates(self, i: int) -> bool: name = self.index_names[i] else: name = None - j = i if self.index_col is None else self.index_col[i] - + # error: Value of type "Union[int, Sequence[int]]" is not indexable + j = ( + i + if self.index_col is None + else self.index_col[i] # type: ignore[index] + ) if is_scalar(self.parse_dates): return (j == self.parse_dates) or ( name is not None and name == self.parse_dates @@ -328,7 +341,9 @@ def _extract_multi_indexer_columns( ic = [] if not isinstance(ic, (list, tuple, np.ndarray)): - ic = [ic] + # error: List item 0 has incompatible type + # "Union[int, Sequence[int]]"; expected "int" + ic = [ic] # type: ignore[list-item] sic = set(ic) # clean the index_names @@ -344,7 +359,9 @@ def extract(r): return tuple(r[i] for i in range(field_count) if i not in sic) columns = list(zip(*(extract(r) for r in header))) - names = ic + columns + # error: No overload variant of "__add__" of "tuple" matches argument + # type "List[Any]" + names = ic + columns # type: ignore[operator] # If we find unnamed columns all in a single # level, then our header was too long. @@ -379,7 +396,12 @@ def _maybe_dedup_names(self, names): if self.mangle_dupe_cols: names = list(names) # so we can index counts: DefaultDict[int | str | tuple, int] = defaultdict(int) - is_potential_mi = _is_potential_multi_index(names, self.index_col) + # error: Argument 2 to "_is_potential_multi_index" has incompatible + # type "Union[int, Sequence[int], None]"; expected + # "Union[bool, Sequence[int], None]" + is_potential_mi = _is_potential_multi_index( + names, self.index_col # type: ignore[arg-type] + ) for i, col in enumerate(names): cur_count = counts[col] @@ -442,7 +464,11 @@ def ix(col): to_remove = [] index = [] - for idx in self.index_col: + # error: Item "int" of "Union[int, Sequence[int], None]" has no + # attribute "__iter__" (not iterable) + # error: Item "None" of "Union[int, Sequence[int], None]" has no + # attribute "__iter__" (not iterable + for idx in self.index_col: # type: ignore[union-attr] i = ix(idx) to_remove.append(i) index.append(data[i]) @@ -471,7 +497,11 @@ def _get_name(icol): to_remove = [] index = [] - for idx in self.index_col: + # error: Item "int" of "Union[int, Sequence[int], None]" has no + # attribute "__iter__" (not iterable) + # error: Item "None" of "Union[int, Sequence[int], None]" has no + # attribute "__iter__" (not iterable + for idx in self.index_col: # type: ignore[union-attr] name = _get_name(idx) to_remove.append(name) index.append(data[name]) diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 5c1f8f94a72da..27deb0ae6c7a5 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -39,10 +39,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds): self.low_memory = kwds.pop("low_memory", False) # #2442 - # error: Cannot determine type of 'index_col' - kwds["allow_leading_cols"] = ( - self.index_col is not False # type: ignore[has-type] - ) + kwds["allow_leading_cols"] = self.index_col is not False # GH20529, validate usecol arg before TextReader kwds["usecols"] = self.usecols @@ -82,7 +79,6 @@ def __init__(self, src: FilePathOrBuffer, **kwds): if len(self._reader.header) > 1: # we have a multi index in the columns # error: Cannot determine type of 'names' - # error: Cannot determine type of 'index_names' # error: Cannot determine type of 'col_names' ( self.names, # type: ignore[has-type] @@ -91,7 +87,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds): passed_names, ) = self._extract_multi_indexer_columns( self._reader.header, - self.index_names, # type: ignore[has-type] + self.index_names, self.col_names, # type: ignore[has-type] passed_names, ) @@ -160,10 +156,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds): self.orig_names = self.names # type: ignore[has-type] if not self._has_complex_date_col: - # error: Cannot determine type of 'index_col' - if self._reader.leading_cols == 0 and is_index_col( - self.index_col # type: ignore[has-type] - ): + if self._reader.leading_cols == 0 and is_index_col(self.index_col): self._name_processed = True ( @@ -174,8 +167,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds): ) = self._clean_index_names( # error: Cannot determine type of 'names' self.names, # type: ignore[has-type] - # error: Cannot determine type of 'index_col' - self.index_col, # type: ignore[has-type] + self.index_col, self.unnamed_cols, ) @@ -266,7 +258,8 @@ def read(self, nrows=None): if self.index_col is None: values = data.pop(i) else: - values = data.pop(self.index_col[i]) + # error: Value of type "Union[int, Sequence[int]]" is not indexable + values = data.pop(self.index_col[i]) # type: ignore[index] values = self._maybe_parse_dates(values, i, try_parse_dates=True) arrays.append(values) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 670868c6f4261..0ad4abcb1b91e 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -117,7 +117,6 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - # error: Cannot determine type of 'index_names' # error: Cannot determine type of 'col_names' ( self.columns, @@ -126,7 +125,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds): _, ) = self._extract_multi_indexer_columns( self.columns, - self.index_names, # type: ignore[has-type] + self.index_names, self.col_names, # type: ignore[has-type] ) # Update list of original names to include all indices. @@ -256,10 +255,9 @@ def read(self, rows=None): if not len(content): # pragma: no cover # DataFrame with the right metadata, even though it's length 0 names = self._maybe_dedup_names(self.orig_names) - # error: Cannot determine type of 'index_col' index, columns, col_dict = self._get_empty_meta( names, - self.index_col, # type: ignore[has-type] + self.index_col, self.index_names, self.dtype, ) @@ -288,8 +286,9 @@ def _exclude_implicit_index(self, alldata): offset = 0 if self._implicit_index: - # error: Cannot determine type of 'index_col' - offset = len(self.index_col) # type: ignore[has-type] + # error: Argument 1 to "len" has incompatible type + # "Union[int, Sequence[int], None]"; expected "Sized" + offset = len(self.index_col) # type: ignore[arg-type] len_alldata = len(alldata) return { @@ -438,9 +437,12 @@ def _infer_columns(self): # line for the rest of the parsing code if hr == header[-1]: lc = len(this_columns) - # error: Cannot determine type of 'index_col' - sic = self.index_col # type: ignore[has-type] - ic = len(sic) if sic is not None else 0 + sic = self.index_col + # error: Argument 1 to "len" has incompatible type + # "Union[int, Sequence[int]]"; expected "Sized" + ic = ( + len(sic) if sic is not None else 0 # type: ignore[arg-type] + ) unnamed_count = len(this_unnamed_cols) # if wrong number of blanks or no index, not our format @@ -879,8 +881,7 @@ def _get_index_name(self, columns): if line is not None: # leave it 0, #2442 # Case 1 - # error: Cannot determine type of 'index_col' - index_col = self.index_col # type: ignore[has-type] + index_col = self.index_col if index_col is not False: implicit_first_cols = len(line) - self.num_original_columns @@ -919,20 +920,16 @@ def _rows_to_cols(self, content): col_len = self.num_original_columns if self._implicit_index: - col_len += len(self.index_col) + # error: Argument 1 to "len" has incompatible type + # "Union[int, Sequence[int]]"; expected "Sized" + col_len += len(self.index_col) # type: ignore[arg-type] max_len = max(len(row) for row in content) # Check that there are no rows with too many # elements in their row (rows with too few # elements are padded with NaN). - # error: Non-overlapping identity check (left operand type: "List[int]", - # right operand type: "Literal[False]") - if ( - max_len > col_len - and self.index_col is not False # type: ignore[comparison-overlap] - and self.usecols is None - ): + if max_len > col_len and self.index_col is not False and self.usecols is None: footers = self.skipfooter if self.skipfooter else 0 bad_lines = [] @@ -984,13 +981,13 @@ def _rows_to_cols(self, content): col_indices = self._col_indices if self._implicit_index: + # error: Argument 1 to "len" has incompatible type + # "Union[int, Sequence[int]]"; expected "Sized" + lic = len(self.index_col) # type: ignore[arg-type] zipped_content = [ a for i, a in enumerate(zipped_content) - if ( - i < len(self.index_col) - or i - len(self.index_col) in col_indices - ) + if (i < lic or i - lic in col_indices) ] else: zipped_content = [