Skip to content

TYP: mostly Hashtable and ArrowExtensionArray #56689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions pandas/_libs/hashtable.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ from typing import (
Any,
Hashable,
Literal,
overload,
)

import numpy as np
Expand Down Expand Up @@ -180,18 +181,30 @@ class HashTable:
na_value: object = ...,
mask=...,
) -> npt.NDArray[np.intp]: ...
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
mask=...,
) -> (
tuple[
np.ndarray, # np.ndarray[subclass-specific]
npt.NDArray[np.intp],
]
| np.ndarray
): ... # np.ndarray[subclass-specific]
*,
return_inverse: Literal[False] = ...,
mask: None = ...,
) -> np.ndarray: ... # np.ndarray[subclass-specific]
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
*,
return_inverse: Literal[True],
mask: None = ...,
) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific]
@overload
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
*,
return_inverse: Literal[False] = ...,
mask: npt.NDArray[np.bool_],
) -> tuple[np.ndarray, npt.NDArray[np.bool_],]: ... # np.ndarray[subclass-specific]
def factorize(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ cdef class {{name}}HashTable(HashTable):
return uniques.to_array(), result_mask.to_array()
return uniques.to_array()

def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False, object mask=None):
def unique(self, const {{dtype}}_t[:] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down Expand Up @@ -1180,7 +1180,7 @@ cdef class StringHashTable(HashTable):
return uniques.to_array(), labels.base # .base -> underlying ndarray
return uniques.to_array()

def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down Expand Up @@ -1438,7 +1438,7 @@ cdef class PyObjectHashTable(HashTable):
return uniques.to_array(), labels.base # .base -> underlying ndarray
return uniques.to_array()

def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
"""
Calculate unique values and labels (no sorting!)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def __len__(self) -> int:
def __iter__(self) -> Iterator[_T_co]:
...

def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int:
...

def count(self, value: Any, /) -> int:
Expand Down
9 changes: 6 additions & 3 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import copy
import io
import pickle as pkl
from typing import TYPE_CHECKING
from typing import (
TYPE_CHECKING,
Any,
)

import numpy as np

Expand Down Expand Up @@ -209,7 +212,7 @@ def load_newobj_ex(self) -> None:
pass


def load(fh, encoding: str | None = None, is_verbose: bool = False):
def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
"""
Load a pickle, with a provided encoding,

Expand Down Expand Up @@ -239,7 +242,7 @@ def loads(
fix_imports: bool = True,
encoding: str = "ASCII",
errors: str = "strict",
):
) -> Any:
"""
Analogous to pickle._loads.
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class PandasDelegate:
def _delegate_property_get(self, name: str, *args, **kwargs):
raise TypeError(f"You cannot access the property {name}")

def _delegate_property_set(self, name: str, value, *args, **kwargs):
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> None:
raise TypeError(f"The property {name} cannot be set")

def _delegate_method(self, name: str, *args, **kwargs):
Expand Down
23 changes: 21 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TYPE_CHECKING,
Literal,
cast,
overload,
)
import warnings

Expand Down Expand Up @@ -181,6 +182,20 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
return ensure_object(values)


@overload
def _reconstruct_data(
values: ExtensionArray, dtype: DtypeObj, original: AnyArrayLike
) -> ExtensionArray:
...


@overload
def _reconstruct_data(
values: np.ndarray, dtype: DtypeObj, original: AnyArrayLike
) -> np.ndarray:
...


def _reconstruct_data(
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
) -> ArrayLike:
Expand Down Expand Up @@ -259,7 +274,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
}


def _get_hashtable_algo(values: np.ndarray):
def _get_hashtable_algo(
values: np.ndarray,
) -> tuple[type[htable.HashTable], np.ndarray]:
"""
Parameters
----------
Expand Down Expand Up @@ -1550,7 +1567,9 @@ def safe_sort(
hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
t = hash_klass(len(values))
t.map_locations(values)
sorter = ensure_platform_int(t.lookup(ordered))
# error: Argument 1 to "lookup" of "HashTable" has incompatible type
# "ExtensionArray | ndarray[Any, Any] | Index | Series"; expected "ndarray"
sorter = ensure_platform_int(t.lookup(ordered)) # type: ignore[arg-type]

if use_na_sentinel:
# take_nd is faster, but only works for na_sentinels of -1
Expand Down
Loading