diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 4c647056641f5..8af7c1a026fc6 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -5,6 +5,8 @@ from typing import ( Any, Callable, Generator, + Literal, + overload, ) import numpy as np @@ -51,7 +53,7 @@ def is_float_array(values: np.ndarray, skipna: bool = False): ... def is_integer_array(values: np.ndarray, skipna: bool = False): ... def is_bool_array(values: np.ndarray, skipna: bool = False): ... -def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ... +def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ... def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ... def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ... @@ -59,15 +61,57 @@ def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ... def map_infer( arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False +) -> np.ndarray: ... + + +@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray +def maybe_convert_objects( + objects: np.ndarray, # np.ndarray[object] + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[False] = ..., + convert_timedelta: bool = ..., + convert_to_nullable_integer: Literal[False] = ..., +) -> np.ndarray: ... + +@overload +def maybe_convert_objects( + objects: np.ndarray, # np.ndarray[object] + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[False] = False, + convert_timedelta: bool = ..., + convert_to_nullable_integer: Literal[True] = ..., ) -> ArrayLike: ... +@overload def maybe_convert_objects( objects: np.ndarray, # np.ndarray[object] - try_float: bool = False, - safe: bool = False, - convert_datetime: bool = False, - convert_timedelta: bool = False, - convert_to_nullable_integer: bool = False, + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[True] = ..., + convert_timedelta: bool = ..., + convert_to_nullable_integer: Literal[False] = ..., +) -> ArrayLike: ... + +@overload +def maybe_convert_objects( + objects: np.ndarray, # np.ndarray[object] + try_float: bool = ..., + safe: bool = ..., + convert_datetime: Literal[True] = ..., + convert_timedelta: bool = ..., + convert_to_nullable_integer: Literal[True] = ..., +) -> ArrayLike: ... + +@overload +def maybe_convert_objects( + objects: np.ndarray, # np.ndarray[object] + try_float: bool = ..., + safe: bool = ..., + convert_datetime: bool = ..., + convert_timedelta: bool = ..., + convert_to_nullable_integer: bool = ..., ) -> ArrayLike: ... def maybe_convert_numeric( @@ -140,7 +184,7 @@ def map_infer_mask( convert: bool = ..., na_value: Any = ..., dtype: np.dtype = ..., -) -> ArrayLike: ... +) -> np.ndarray: ... def indices_fast( index: np.ndarray, # ndarray[intp_t] diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 77375cac39921..c18cd56be6cc4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2488,7 +2488,7 @@ no_default = NoDefault.no_default # Sentinel indicating the default value. @cython.wraparound(False) def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True, object na_value=no_default, cnp.dtype dtype=np.dtype(object) - ) -> "ArrayLike": + ) -> np.ndarray: """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2508,7 +2508,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr Returns ------- - np.ndarray or ExtensionArray + np.ndarray """ cdef: Py_ssize_t i, n @@ -2545,7 +2545,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr @cython.wraparound(False) def map_infer( ndarray arr, object f, bint convert=True, bint ignore_na=False -) -> "ArrayLike": +) -> np.ndarray: """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2559,7 +2559,7 @@ def map_infer( Returns ------- - np.ndarray or ExtensionArray + np.ndarray """ cdef: Py_ssize_t i, n @@ -2697,7 +2697,7 @@ def to_object_array_tuples(rows: object) -> np.ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> "ArrayLike": +def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray: cdef: Py_ssize_t i, n = len(keys) object val diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4a5dca348a8c0..93df88aba2cba 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -262,9 +262,7 @@ def _box_values(self, values) -> np.ndarray: """ apply box func to passed values """ - # error: Incompatible return value type (got - # "Union[ExtensionArray, ndarray]", expected "ndarray") - return lib.map_infer(values, self._box_func) # type: ignore[return-value] + return lib.map_infer(values, self._box_func) def __iter__(self): if self.ndim > 1: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 6954b512c7ad0..307517eedb2cd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -450,9 +450,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None): if not na_value_is_na: mask[:] = False - # error: Argument 1 to "maybe_convert_objects" has incompatible - # type "Union[ExtensionArray, ndarray]"; expected "ndarray" - return constructor(result, mask) # type: ignore[arg-type] + return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index b7a0e70180ae4..180ed51e7fd2b 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -420,10 +420,8 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: func = missing.get_fill_func(method) - # error: Argument 1 to "to_numpy" of "ArrowStringArray" has incompatible - # type "Type[object]"; expected "Union[str, dtype[Any], None]" new_values, _ = func( - self.to_numpy(object), # type: ignore[arg-type] + self.to_numpy("object"), limit=limit, mask=mask, ) @@ -740,11 +738,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None): if not na_value_is_na: mask[:] = False - # error: Argument 1 to "IntegerArray" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "ndarray" - # error: Argument 1 to "BooleanArray" has incompatible type - # "Union[ExtensionArray, ndarray]"; expected "ndarray" - return constructor(result, mask) # type: ignore[arg-type] + return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8f8c2ba2981c8..1dcb2b97476a3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -996,8 +996,8 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): counts[label] = group.shape[0] result[label] = res - out = lib.maybe_convert_objects(result, try_float=False) - out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True) + npvalues = lib.maybe_convert_objects(result, try_float=False) + out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) return out, counts diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 487047f1a1dbb..2e16730289ff0 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -35,7 +35,6 @@ ensure_platform_int, is_1d_only_ea_dtype, is_dtype_equal, - is_extension_array_dtype, is_list_like, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -701,7 +700,7 @@ def _interleave( # Give EAs some input on what happens here. Sparse needs this. if isinstance(dtype, SparseDtype): dtype = dtype.subtype - elif is_extension_array_dtype(dtype): + elif isinstance(dtype, ExtensionDtype): dtype = "object" elif is_dtype_equal(dtype, str): dtype = "object" @@ -709,8 +708,8 @@ def _interleave( # error: Argument "dtype" to "empty" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" + # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, + # Tuple[Any, Any]]]" result = np.empty(self.shape, dtype=dtype) # type: ignore[arg-type] itemmask = np.zeros(self.shape[0]) @@ -1108,16 +1107,12 @@ def fast_xs(self, loc: int) -> ArrayLike: dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) n = len(self) - if is_extension_array_dtype(dtype): + if isinstance(dtype, ExtensionDtype): # we'll eventually construct an ExtensionArray. result = np.empty(n, dtype=object) + # TODO: let's just use dtype.empty? else: - # error: Argument "dtype" to "empty" has incompatible type - # "Union[dtype, ExtensionDtype, None]"; expected "Union[dtype, - # None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, - # Any]]" - result = np.empty(n, dtype=dtype) # type: ignore[arg-type] + result = np.empty(n, dtype=dtype) result = ensure_wrapped_if_datetimelike(result) diff --git a/pandas/core/series.py b/pandas/core/series.py index da25ca8e3f653..6a3e997a58754 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3087,10 +3087,8 @@ def combine(self, other, func, fill_value=None) -> Series: new_name = self.name # try_float=False is to match _aggregate_series_pure_python - res_values = lib.maybe_convert_objects(new_values, try_float=False) - res_values = maybe_cast_pointwise_result( - res_values, self.dtype, same_dtype=False - ) + npvalues = lib.maybe_convert_objects(new_values, try_float=False) + res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) return self._constructor(res_values, index=new_index, name=new_name) def combine_first(self, other) -> Series: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8577bb5dc311b..4eb469f52fb19 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -252,9 +252,7 @@ def _convert_and_box_cache( from pandas import Series result = Series(arg).map(cache_array) - # error: Argument 1 to "_box_as_indexlike" has incompatible type "Series"; expected - # "Union[ExtensionArray, ndarray]" - return _box_as_indexlike(result, utc=None, name=name) # type: ignore[arg-type] + return _box_as_indexlike(result._values, utc=None, name=name) def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index: @@ -368,13 +366,11 @@ def _convert_listlike_datetimes( arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": - result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) - return DatetimeIndex(result, name=name) + npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) + return DatetimeIndex(npvalues, name=name) elif errors == "ignore": - # error: Incompatible types in assignment (expression has type - # "Index", variable has type "ExtensionArray") - result = Index(arg, name=name) # type: ignore[assignment] - return result + idx = Index(arg, name=name) + return idx raise arg = ensure_object(arg) @@ -393,37 +389,30 @@ def _convert_listlike_datetimes( require_iso8601 = not infer_datetime_format format = None - # error: Incompatible types in assignment (expression has type "None", variable has - # type "ExtensionArray") - result = None # type: ignore[assignment] - if format is not None: - # error: Incompatible types in assignment (expression has type - # "Optional[Index]", variable has type "ndarray") - result = _to_datetime_with_format( # type: ignore[assignment] + res = _to_datetime_with_format( arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format ) - if result is not None: - return result - - if result is None: - assert format is None or infer_datetime_format - utc = tz == "utc" - result, tz_parsed = objects_to_datetime64ns( - arg, - dayfirst=dayfirst, - yearfirst=yearfirst, - utc=utc, - errors=errors, - require_iso8601=require_iso8601, - allow_object=True, - ) + if res is not None: + return res - if tz_parsed is not None: - # We can take a shortcut since the datetime64 numpy array - # is in UTC - dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) - return DatetimeIndex._simple_new(dta, name=name) + assert format is None or infer_datetime_format + utc = tz == "utc" + result, tz_parsed = objects_to_datetime64ns( + arg, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + errors=errors, + require_iso8601=require_iso8601, + allow_object=True, + ) + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) + return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name) @@ -509,13 +498,11 @@ def _to_datetime_with_format( # fallback if result is None: - # error: Incompatible types in assignment (expression has type - # "Optional[Index]", variable has type "Optional[ndarray]") - result = _array_strptime_with_fallback( # type: ignore[assignment] + res = _array_strptime_with_fallback( arg, name, tz, fmt, exact, errors, infer_datetime_format ) - if result is not None: - return result + if res is not None: + return res except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ba406a1ef117c..9d653c9a5f97c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1318,7 +1318,6 @@ def _format(x): "ExtensionArray formatting should use ExtensionArrayFormatter" ) inferred = lib.map_infer(vals, is_float) - inferred = cast(np.ndarray, inferred) is_float_type = ( inferred # vals may have 2 or more dimensions