Skip to content

TYP: overload lib.maybe_convert_objects #41166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 51 additions & 7 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from typing import (
Any,
Callable,
Generator,
Literal,
overload,
)

import numpy as np
Expand Down Expand Up @@ -51,23 +53,65 @@ def is_float_array(values: np.ndarray, skipna: bool = False): ...
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
def is_bool_array(values: np.ndarray, skipna: bool = False): ...

def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ...

def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...

def map_infer(
arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
) -> np.ndarray: ...


@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> np.ndarray: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = False,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = False,
safe: bool = False,
convert_datetime: bool = False,
convert_timedelta: bool = False,
convert_to_nullable_integer: bool = False,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

def maybe_convert_numeric(
Expand Down Expand Up @@ -140,7 +184,7 @@ def map_infer_mask(
convert: bool = ...,
na_value: Any = ...,
dtype: np.dtype = ...,
) -> ArrayLike: ...
) -> np.ndarray: ...

def indices_fast(
index: np.ndarray, # ndarray[intp_t]
Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,7 @@ no_default = NoDefault.no_default # Sentinel indicating the default value.
@cython.wraparound(False)
def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
object na_value=no_default, cnp.dtype dtype=np.dtype(object)
) -> "ArrayLike":
) -> np.ndarray:
"""
Substitute for np.vectorize with pandas-friendly dtype inference.

Expand All @@ -2508,7 +2508,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr

Returns
-------
np.ndarray or ExtensionArray
np.ndarray
"""
cdef:
Py_ssize_t i, n
Expand Down Expand Up @@ -2545,7 +2545,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
@cython.wraparound(False)
def map_infer(
ndarray arr, object f, bint convert=True, bint ignore_na=False
) -> "ArrayLike":
) -> np.ndarray:
"""
Substitute for np.vectorize with pandas-friendly dtype inference.

Expand All @@ -2559,7 +2559,7 @@ def map_infer(

Returns
-------
np.ndarray or ExtensionArray
np.ndarray
"""
cdef:
Py_ssize_t i, n
Expand Down Expand Up @@ -2697,7 +2697,7 @@ def to_object_array_tuples(rows: object) -> np.ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> "ArrayLike":
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray:
cdef:
Py_ssize_t i, n = len(keys)
object val
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,7 @@ def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
# error: Incompatible return value type (got
# "Union[ExtensionArray, ndarray]", expected "ndarray")
return lib.map_infer(values, self._box_func) # type: ignore[return-value]
return lib.map_infer(values, self._box_func)

def __iter__(self):
if self.ndim > 1:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
if not na_value_is_na:
mask[:] = False

# error: Argument 1 to "maybe_convert_objects" has incompatible
# type "Union[ExtensionArray, ndarray]"; expected "ndarray"
return constructor(result, mask) # type: ignore[arg-type]
return constructor(result, mask)

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,8 @@ def fillna(self, value=None, method=None, limit=None):
if mask.any():
if method is not None:
func = missing.get_fill_func(method)
# error: Argument 1 to "to_numpy" of "ArrowStringArray" has incompatible
# type "Type[object]"; expected "Union[str, dtype[Any], None]"
new_values, _ = func(
self.to_numpy(object), # type: ignore[arg-type]
self.to_numpy("object"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an unrelated change to the scope (from the PR title) of this PR. The mypy error is a false positive, no need to change code. will be fixed in #41185

but I guess nbd (other than merge conflicts)

limit=limit,
mask=mask,
)
Expand Down Expand Up @@ -740,11 +738,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
if not na_value_is_na:
mask[:] = False

# error: Argument 1 to "IntegerArray" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
# error: Argument 1 to "BooleanArray" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
return constructor(result, mask) # type: ignore[arg-type]
return constructor(result, mask)

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,8 +996,8 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
counts[label] = group.shape[0]
result[label] = res

out = lib.maybe_convert_objects(result, try_float=False)
out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True)
npvalues = lib.maybe_convert_objects(result, try_float=False)
out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)

return out, counts

Expand Down
17 changes: 6 additions & 11 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
ensure_platform_int,
is_1d_only_ea_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_list_like,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
Expand Down Expand Up @@ -701,16 +700,16 @@ def _interleave(
# Give EAs some input on what happens here. Sparse needs this.
if isinstance(dtype, SparseDtype):
dtype = dtype.subtype
elif is_extension_array_dtype(dtype):
elif isinstance(dtype, ExtensionDtype):
dtype = "object"
elif is_dtype_equal(dtype, str):
dtype = "object"

# error: Argument "dtype" to "empty" has incompatible type
# "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
# Any]]]"
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
# Tuple[Any, Any]]]"
result = np.empty(self.shape, dtype=dtype) # type: ignore[arg-type]

itemmask = np.zeros(self.shape[0])
Expand Down Expand Up @@ -1108,16 +1107,12 @@ def fast_xs(self, loc: int) -> ArrayLike:
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])

n = len(self)
if is_extension_array_dtype(dtype):
if isinstance(dtype, ExtensionDtype):
# we'll eventually construct an ExtensionArray.
result = np.empty(n, dtype=object)
# TODO: let's just use dtype.empty?
else:
# error: Argument "dtype" to "empty" has incompatible type
# "Union[dtype, ExtensionDtype, None]"; expected "Union[dtype,
# None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any,
# Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any,
# Any]]"
result = np.empty(n, dtype=dtype) # type: ignore[arg-type]
result = np.empty(n, dtype=dtype)

result = ensure_wrapped_if_datetimelike(result)

Expand Down
6 changes: 2 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3087,10 +3087,8 @@ def combine(self, other, func, fill_value=None) -> Series:
new_name = self.name

# try_float=False is to match _aggregate_series_pure_python
res_values = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(
res_values, self.dtype, same_dtype=False
)
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
return self._constructor(res_values, index=new_index, name=new_name)

def combine_first(self, other) -> Series:
Expand Down
69 changes: 28 additions & 41 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,7 @@ def _convert_and_box_cache(
from pandas import Series

result = Series(arg).map(cache_array)
# error: Argument 1 to "_box_as_indexlike" has incompatible type "Series"; expected
# "Union[ExtensionArray, ndarray]"
return _box_as_indexlike(result, utc=None, name=name) # type: ignore[arg-type]
return _box_as_indexlike(result._values, utc=None, name=name)


def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index:
Expand Down Expand Up @@ -368,13 +366,11 @@ def _convert_listlike_datetimes(
arg, _ = maybe_convert_dtype(arg, copy=False)
except TypeError:
if errors == "coerce":
result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(result, name=name)
npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(npvalues, name=name)
elif errors == "ignore":
# error: Incompatible types in assignment (expression has type
# "Index", variable has type "ExtensionArray")
result = Index(arg, name=name) # type: ignore[assignment]
return result
idx = Index(arg, name=name)
return idx
raise

arg = ensure_object(arg)
Expand All @@ -393,37 +389,30 @@ def _convert_listlike_datetimes(
require_iso8601 = not infer_datetime_format
format = None

# error: Incompatible types in assignment (expression has type "None", variable has
# type "ExtensionArray")
result = None # type: ignore[assignment]

if format is not None:
# error: Incompatible types in assignment (expression has type
# "Optional[Index]", variable has type "ndarray")
result = _to_datetime_with_format( # type: ignore[assignment]
res = _to_datetime_with_format(
arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
)
if result is not None:
return result

if result is None:
assert format is None or infer_datetime_format
utc = tz == "utc"
result, tz_parsed = objects_to_datetime64ns(
arg,
dayfirst=dayfirst,
yearfirst=yearfirst,
utc=utc,
errors=errors,
require_iso8601=require_iso8601,
allow_object=True,
)
if res is not None:
return res

if tz_parsed is not None:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
return DatetimeIndex._simple_new(dta, name=name)
assert format is None or infer_datetime_format
utc = tz == "utc"
result, tz_parsed = objects_to_datetime64ns(
arg,
dayfirst=dayfirst,
yearfirst=yearfirst,
utc=utc,
errors=errors,
require_iso8601=require_iso8601,
allow_object=True,
)

if tz_parsed is not None:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
return DatetimeIndex._simple_new(dta, name=name)

utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)
Expand Down Expand Up @@ -509,13 +498,11 @@ def _to_datetime_with_format(

# fallback
if result is None:
# error: Incompatible types in assignment (expression has type
# "Optional[Index]", variable has type "Optional[ndarray]")
result = _array_strptime_with_fallback( # type: ignore[assignment]
res = _array_strptime_with_fallback(
arg, name, tz, fmt, exact, errors, infer_datetime_format
)
if result is not None:
return result
if res is not None:
return res

except ValueError as e:
# Fallback to try to convert datetime objects if timezone-aware
Expand Down
1 change: 0 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,6 @@ def _format(x):
"ExtensionArray formatting should use ExtensionArrayFormatter"
)
inferred = lib.map_infer(vals, is_float)
inferred = cast(np.ndarray, inferred)
is_float_type = (
inferred
# vals may have 2 or more dimensions
Expand Down