-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TYP: annotate core.algorithms #33944
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
|
||
from pandas._libs import Timestamp, algos, hashtable as htable, lib | ||
from pandas._libs.tslib import iNaT | ||
from pandas._typing import AnyArrayLike, DtypeObj | ||
from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj | ||
from pandas.util._decorators import doc | ||
|
||
from pandas.core.dtypes.cast import ( | ||
|
@@ -44,6 +44,7 @@ | |
is_timedelta64_dtype, | ||
is_unsigned_integer_dtype, | ||
needs_i8_conversion, | ||
pandas_dtype, | ||
) | ||
from pandas.core.dtypes.generic import ( | ||
ABCExtensionArray, | ||
|
@@ -66,7 +67,9 @@ | |
# --------------- # | ||
# dtype access # | ||
# --------------- # | ||
def _ensure_data(values, dtype=None): | ||
def _ensure_data( | ||
values, dtype: Optional[DtypeObj] = None | ||
) -> Tuple[np.ndarray, DtypeObj]: | ||
""" | ||
routine to ensure that our data is of the correct | ||
input dtype for lower-level routines | ||
|
@@ -88,42 +91,43 @@ def _ensure_data(values, dtype=None): | |
Returns | ||
------- | ||
values : ndarray | ||
pandas_dtype : str or dtype | ||
pandas_dtype : np.dtype or ExtensionDtype | ||
""" | ||
|
||
if not isinstance(values, ABCMultiIndex): | ||
# extract_array would raise | ||
values = extract_array(values, extract_numpy=True) | ||
|
||
# we check some simple dtypes first | ||
if is_object_dtype(dtype): | ||
return ensure_object(np.asarray(values)), "object" | ||
return ensure_object(np.asarray(values)), np.dtype("object") | ||
elif is_object_dtype(values) and dtype is None: | ||
return ensure_object(np.asarray(values)), "object" | ||
return ensure_object(np.asarray(values)), np.dtype("object") | ||
|
||
try: | ||
if is_bool_dtype(values) or is_bool_dtype(dtype): | ||
# we are actually coercing to uint64 | ||
# until our algos support uint8 directly (see TODO) | ||
return np.asarray(values).astype("uint64"), "bool" | ||
return np.asarray(values).astype("uint64"), np.dtype("bool") | ||
elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): | ||
return ensure_int64(values), "int64" | ||
return ensure_int64(values), np.dtype("int64") | ||
elif is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype): | ||
return ensure_uint64(values), "uint64" | ||
return ensure_uint64(values), np.dtype("uint64") | ||
elif is_float_dtype(values) or is_float_dtype(dtype): | ||
return ensure_float64(values), "float64" | ||
return ensure_float64(values), np.dtype("float64") | ||
elif is_complex_dtype(values) or is_complex_dtype(dtype): | ||
|
||
# ignore the fact that we are casting to float | ||
# which discards complex parts | ||
with catch_warnings(): | ||
simplefilter("ignore", np.ComplexWarning) | ||
values = ensure_float64(values) | ||
return values, "float64" | ||
return values, np.dtype("float64") | ||
|
||
except (TypeError, ValueError, OverflowError): | ||
# if we are trying to coerce to a dtype | ||
# and it is incompat this will fall through to here | ||
return ensure_object(values), "object" | ||
return ensure_object(values), np.dtype("object") | ||
|
||
# datetimelike | ||
vals_dtype = getattr(values, "dtype", None) | ||
|
@@ -159,7 +163,7 @@ def _ensure_data(values, dtype=None): | |
is_categorical_dtype(dtype) or dtype is None | ||
): | ||
values = values.codes | ||
dtype = "category" | ||
dtype = pandas_dtype("category") | ||
|
||
# we are actually coercing to int64 | ||
# until our algos support int* directly (not all do) | ||
|
@@ -169,22 +173,24 @@ def _ensure_data(values, dtype=None): | |
|
||
# we have failed, return object | ||
values = np.asarray(values, dtype=np.object) | ||
return ensure_object(values), "object" | ||
return ensure_object(values), np.dtype("object") | ||
|
||
|
||
def _reconstruct_data(values, dtype, original): | ||
def _reconstruct_data( | ||
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike | ||
) -> ArrayLike: | ||
""" | ||
reverse of _ensure_data | ||
|
||
Parameters | ||
---------- | ||
values : ndarray | ||
dtype : pandas_dtype | ||
original : ndarray-like | ||
values : np.ndarray or ExtensionArray | ||
dtype : np.ndtype or ExtensionDtype | ||
original : AnyArrayLike | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick, values and dtype are 'expanded' aliases, do the same for AnyArrayLike? again maybe more prose, is Index only allowed with bool_dtype? what about Series? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAICT it can be Index or Series regardless of dtype, whatever was passed to the top-level fucntion |
||
|
||
Returns | ||
------- | ||
Index for extension types, otherwise ndarray casted to dtype | ||
ExtensionArray or np.ndarray | ||
""" | ||
if is_extension_array_dtype(dtype): | ||
values = dtype.construct_array_type()._from_sequence(values) | ||
|
@@ -416,6 +422,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: | |
|
||
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): | ||
values = construct_1d_object_array_from_listlike(list(values)) | ||
# TODO: could use ensure_arraylike here | ||
|
||
comps = extract_array(comps, extract_numpy=True) | ||
if is_categorical_dtype(comps): | ||
|
@@ -729,6 +736,7 @@ def value_counts( | |
return result | ||
|
||
|
||
# Called once from SparseArray | ||
def _value_counts_arraylike(values, dropna: bool): | ||
""" | ||
Parameters | ||
|
@@ -823,6 +831,7 @@ def mode(values, dropna: bool = True) -> "Series": | |
# categorical is a fast-path | ||
if is_categorical_dtype(values): | ||
if isinstance(values, Series): | ||
# TODO: should we be passing `name` below? | ||
return Series(values._values.mode(dropna=dropna), name=values.name) | ||
return values.mode(dropna=dropna) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ArrayLike is a typevar. is the return type here always the same as the type of values? or dependant on dtype.
maybe just expand on the prose in the docstring till this bites us.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yikes, I think I've been using it incorrectly in a lot of places then. Ive been using it as a synonym for
Union[np.ndarray, ExtensionArray]
. I guess I'll do a dedicated pass through the code to weed out those mis-usagesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is OK if the alias only appears once in the function signature/return.
once it appears twice, then they are bound.