Skip to content

TYP: overload lib.maybe_convert_objects #41166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 28, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 51 additions & 7 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from typing import (
Any,
Callable,
Generator,
Literal,
overload,
)

import numpy as np
Expand Down Expand Up @@ -51,23 +53,65 @@ def is_float_array(values: np.ndarray, skipna: bool = False): ...
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
def is_bool_array(values: np.ndarray, skipna: bool = False): ...

def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ...

def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...

def map_infer(
arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
) -> np.ndarray: ...


@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> np.ndarray: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = False,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = False,
safe: bool = False,
convert_datetime: bool = False,
convert_timedelta: bool = False,
convert_to_nullable_integer: bool = False,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = ...,
safe: bool = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

def maybe_convert_numeric(
Expand Down Expand Up @@ -140,7 +184,7 @@ def map_infer_mask(
convert: bool = ...,
na_value: Any = ...,
dtype: np.dtype = ...,
) -> ArrayLike: ...
) -> np.ndarray: ...

def indices_fast(
index: np.ndarray, # ndarray[intp_t]
Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,7 @@ no_default = NoDefault.no_default # Sentinel indicating the default value.
@cython.wraparound(False)
def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
object na_value=no_default, cnp.dtype dtype=np.dtype(object)
) -> "ArrayLike":
) -> np.ndarray:
"""
Substitute for np.vectorize with pandas-friendly dtype inference.

Expand All @@ -2508,7 +2508,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr

Returns
-------
np.ndarray or ExtensionArray
np.ndarray
"""
cdef:
Py_ssize_t i, n
Expand Down Expand Up @@ -2545,7 +2545,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
@cython.wraparound(False)
def map_infer(
ndarray arr, object f, bint convert=True, bint ignore_na=False
) -> "ArrayLike":
) -> np.ndarray:
"""
Substitute for np.vectorize with pandas-friendly dtype inference.

Expand All @@ -2559,7 +2559,7 @@ def map_infer(

Returns
-------
np.ndarray or ExtensionArray
np.ndarray
"""
cdef:
Py_ssize_t i, n
Expand Down Expand Up @@ -2697,7 +2697,7 @@ def to_object_array_tuples(rows: object) -> np.ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> "ArrayLike":
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray:
cdef:
Py_ssize_t i, n = len(keys)
object val
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,7 @@ def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
# error: Incompatible return value type (got
# "Union[ExtensionArray, ndarray]", expected "ndarray")
return lib.map_infer(values, self._box_func) # type: ignore[return-value]
return lib.map_infer(values, self._box_func)

def __iter__(self):
if self.ndim > 1:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
if not na_value_is_na:
mask[:] = False

# error: Argument 1 to "maybe_convert_objects" has incompatible
# type "Union[ExtensionArray, ndarray]"; expected "ndarray"
return constructor(result, mask) # type: ignore[arg-type]
return constructor(result, mask)

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,8 @@ def fillna(self, value=None, method=None, limit=None):
if mask.any():
if method is not None:
func = missing.get_fill_func(method)
# error: Argument 1 to "to_numpy" of "ArrowStringArray" has incompatible
# type "Type[object]"; expected "Union[str, dtype[Any], None]"
new_values, _ = func(
self.to_numpy(object), # type: ignore[arg-type]
self.to_numpy("object"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an unrelated change to the scope (from the PR title) of this PR. The mypy error is a false positive, no need to change code. will be fixed in #41185

but I guess nbd (other than merge conflicts)

limit=limit,
mask=mask,
)
Expand Down Expand Up @@ -740,11 +738,7 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
if not na_value_is_na:
mask[:] = False

# error: Argument 1 to "IntegerArray" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
# error: Argument 1 to "BooleanArray" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
return constructor(result, mask) # type: ignore[arg-type]
return constructor(result, mask)

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,8 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
counts[label] = group.shape[0]
result[label] = res

out = lib.maybe_convert_objects(result, try_float=False)
out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True)
npvalues = lib.maybe_convert_objects(result, try_float=False)
out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)

return out, counts

Expand Down
17 changes: 6 additions & 11 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
ensure_platform_int,
is_1d_only_ea_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_list_like,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
Expand Down Expand Up @@ -701,16 +700,16 @@ def _interleave(
# Give EAs some input on what happens here. Sparse needs this.
if isinstance(dtype, SparseDtype):
dtype = dtype.subtype
elif is_extension_array_dtype(dtype):
elif isinstance(dtype, ExtensionDtype):
dtype = "object"
elif is_dtype_equal(dtype, str):
dtype = "object"

# error: Argument "dtype" to "empty" has incompatible type
# "Union[ExtensionDtype, str, dtype[Any], Type[object], None]"; expected
# "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int],
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
# Any]]]"
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict,
# Tuple[Any, Any]]]"
result = np.empty(self.shape, dtype=dtype) # type: ignore[arg-type]

itemmask = np.zeros(self.shape[0])
Expand Down Expand Up @@ -1108,16 +1107,12 @@ def fast_xs(self, loc: int) -> ArrayLike:
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])

n = len(self)
if is_extension_array_dtype(dtype):
if isinstance(dtype, ExtensionDtype):
# we'll eventually construct an ExtensionArray.
result = np.empty(n, dtype=object)
# TODO: let's just use dtype.empty?
else:
# error: Argument "dtype" to "empty" has incompatible type
# "Union[dtype, ExtensionDtype, None]"; expected "Union[dtype,
# None, type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any,
# Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any,
# Any]]"
result = np.empty(n, dtype=dtype) # type: ignore[arg-type]
result = np.empty(n, dtype=dtype)

result = ensure_wrapped_if_datetimelike(result)

Expand Down
6 changes: 2 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3087,10 +3087,8 @@ def combine(self, other, func, fill_value=None) -> Series:
new_name = self.name

# try_float=False is to match _aggregate_series_pure_python
res_values = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(
res_values, self.dtype, same_dtype=False
)
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
return self._constructor(res_values, index=new_index, name=new_name)

def combine_first(self, other) -> Series:
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
na_value = self._str_na_value

if not len(self):
# error: Argument 1 to "ndarray" has incompatible type "int";
# expected "Sequence[int]"
return np.ndarray(0, dtype=dtype) # type: ignore[arg-type]
# error: Argument "dtype" to "array" has incompatible type
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
# "Union[dtype[Any], None, type, _SupportsDType, str,
# Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
# _DTypeDict, Tuple[Any, Any]]]"
return np.array([], dtype=dtype) # type: ignore[arg-type]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused about why mypy reports this, if I try making a file t.py with

import numpy as np
from pandas._typing import Dtype, Optional

def foo(dtype: Optional[Dtype] = None):
    if dtype is None:
        dtype = np.dtype("object")
    return (np.array([], dtype=type))

then mypy type checks it just fine. Do you know what I might be missing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no idea. i think this edit is unrelated to most of the rest; using np.ndarray here instead of np.array weirds me out

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Dtype alias includes ExtensionDtype which cannot be passed on to numpy

>>> np.array([], dtype=pd.Int64Dtype())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: Cannot interpret 'Int64Dtype()' as a data type
>>> 

Do you know what I might be missing?

return (np.array([], dtype=type)) -> return (np.array([], dtype=dtype))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm embarrassed, thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this looks like there maybe an actual bug here. As part of the ArrowStringArray work, I have been parameterising existing tests on object dtype arrays with StringArray and ArrowStringArray and this has maybe uncovered some latent bugs with StringArray.

I think OK to leave this ignore as a 'fix later' and out of scope here.


arr = np.asarray(self, dtype=object)
mask = isna(arr)
Expand Down
34 changes: 12 additions & 22 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,7 @@ def _convert_and_box_cache(
from pandas import Series

result = Series(arg).map(cache_array)
# error: Argument 1 to "_box_as_indexlike" has incompatible type "Series"; expected
# "Union[ExtensionArray, ndarray]"
return _box_as_indexlike(result, utc=None, name=name) # type: ignore[arg-type]
return _box_as_indexlike(result._values, utc=None, name=name)


def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index:
Expand Down Expand Up @@ -368,13 +366,11 @@ def _convert_listlike_datetimes(
arg, _ = maybe_convert_dtype(arg, copy=False)
except TypeError:
if errors == "coerce":
result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(result, name=name)
npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(npvalues, name=name)
elif errors == "ignore":
# error: Incompatible types in assignment (expression has type
# "Index", variable has type "ExtensionArray")
result = Index(arg, name=name) # type: ignore[assignment]
return result
idx = Index(arg, name=name)
return idx
raise

arg = ensure_object(arg)
Expand All @@ -393,18 +389,14 @@ def _convert_listlike_datetimes(
require_iso8601 = not infer_datetime_format
format = None

# error: Incompatible types in assignment (expression has type "None", variable has
# type "ExtensionArray")
result = None # type: ignore[assignment]
result = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed? The if result is None will always be True?


if format is not None:
# error: Incompatible types in assignment (expression has type
# "Optional[Index]", variable has type "ndarray")
result = _to_datetime_with_format( # type: ignore[assignment]
res = _to_datetime_with_format(
arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
)
if result is not None:
return result
if res is not None:
return res

if result is None:
assert format is None or infer_datetime_format
Expand Down Expand Up @@ -509,13 +501,11 @@ def _to_datetime_with_format(

# fallback
if result is None:
# error: Incompatible types in assignment (expression has type
# "Optional[Index]", variable has type "Optional[ndarray]")
result = _array_strptime_with_fallback( # type: ignore[assignment]
res = _array_strptime_with_fallback(
arg, name, tz, fmt, exact, errors, infer_datetime_format
)
if result is not None:
return result
if res is not None:
return res

except ValueError as e:
# Fallback to try to convert datetime objects if timezone-aware
Expand Down
1 change: 0 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,6 @@ def _format(x):
"ExtensionArray formatting should use ExtensionArrayFormatter"
)
inferred = lib.map_infer(vals, is_float)
inferred = cast(np.ndarray, inferred)
is_float_type = (
inferred
# vals may have 2 or more dimensions
Expand Down