Skip to content

CLN/TYP: _libs #40765

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 25 additions & 26 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[float64_t, ndim=2] values,
ndarray[intp_t] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -148,7 +148,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
const intp_t[:] labels,
int ngroups,
bint is_datetimelike,
bint skipna=True):
bint skipna=True) -> None:
"""
Cumulative product of columns of `values`, in row groups `labels`.

Expand Down Expand Up @@ -205,7 +205,7 @@ def group_cumsum(numeric[:, ::1] out,
const intp_t[:] labels,
int ngroups,
is_datetimelike,
bint skipna=True):
bint skipna=True) -> None:
"""
Cumulative sum of columns of `values`, in row groups `labels`.

Expand Down Expand Up @@ -270,7 +270,7 @@ def group_cumsum(numeric[:, ::1] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
int ngroups, int periods):
int ngroups, int periods) -> None:
cdef:
Py_ssize_t N, i, j, ii, lab
int offset = 0, sign
Expand Down Expand Up @@ -322,14 +322,14 @@ def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
@cython.wraparound(False)
@cython.boundscheck(False)
def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
ndarray[uint8_t] mask, object direction,
int64_t limit, bint dropna):
ndarray[uint8_t] mask, str direction,
int64_t limit, bint dropna) -> None:
"""
Indexes how to fill values forwards or backwards within a group.

Parameters
----------
out : np.ndarray[np.uint8]
out : np.ndarray[np.int64]
Values into which this method will write its results.
labels : np.ndarray[np.intp]
Array containing unique label for each group, with its ordering
Expand Down Expand Up @@ -392,8 +392,8 @@ def group_any_all(uint8_t[::1] out,
const uint8_t[::1] values,
const intp_t[:] labels,
const uint8_t[::1] mask,
object val_test,
bint skipna):
str val_test,
bint skipna) -> None:
"""
Aggregated boolean values to show truthfulness of group elements.

Expand Down Expand Up @@ -465,7 +465,7 @@ def group_add(complexfloating_t[:, ::1] out,
int64_t[::1] counts,
ndarray[complexfloating_t, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=0):
Py_ssize_t min_count=0) -> None:
"""
Only aggregates on axis=0 using Kahan summation
"""
Expand Down Expand Up @@ -518,7 +518,7 @@ def group_prod(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=0):
Py_ssize_t min_count=0) -> None:
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -568,7 +568,7 @@ def group_var(floating[:, ::1] out,
ndarray[floating, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=-1,
int64_t ddof=1):
int64_t ddof=1) -> None:
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, ct, oldmean
Expand Down Expand Up @@ -621,7 +621,7 @@ def group_mean(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const intp_t[::1] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count, y, t
Expand Down Expand Up @@ -673,7 +673,7 @@ def group_ohlc(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -721,7 +721,7 @@ def group_quantile(ndarray[float64_t] out,
ndarray[intp_t] labels,
ndarray[uint8_t] mask,
float64_t q,
object interpolation):
str interpolation) -> None:
"""
Calculate the quantile per group.

Expand All @@ -733,8 +733,6 @@ def group_quantile(ndarray[float64_t] out,
Array containing the values to apply the function against.
labels : ndarray[np.intp]
Array containing the unique group labels.
values : ndarray
Array containing the values to apply the function against.
q : float
The quantile value to search for.
interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
Expand Down Expand Up @@ -865,7 +863,7 @@ def group_last(rank_t[:, ::1] out,
int64_t[::1] counts,
ndarray[rank_t, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -957,8 +955,9 @@ def group_nth(rank_t[:, ::1] out,
int64_t[::1] counts,
ndarray[rank_t, ndim=2] values,
const intp_t[:] labels,
int64_t min_count=-1, int64_t rank=1
):
int64_t min_count=-1,
int64_t rank=1,
) -> None:
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -1050,8 +1049,8 @@ def group_rank(float64_t[:, ::1] out,
ndarray[rank_t, ndim=2] values,
const intp_t[:] labels,
int ngroups,
bint is_datetimelike, object ties_method="average",
bint ascending=True, bint pct=False, object na_option="keep"):
bint is_datetimelike, str ties_method="average",
bint ascending=True, bint pct=False, str na_option="keep") -> None:
"""
Provides the rank of values within each group.

Expand Down Expand Up @@ -1221,7 +1220,7 @@ def group_max(groupby_t[:, ::1] out,
int64_t[::1] counts,
ndarray[groupby_t, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
"""See group_min_max.__doc__"""
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=True)

Expand All @@ -1232,7 +1231,7 @@ def group_min(groupby_t[:, ::1] out,
int64_t[::1] counts,
ndarray[groupby_t, ndim=2] values,
const intp_t[:] labels,
Py_ssize_t min_count=-1):
Py_ssize_t min_count=-1) -> None:
"""See group_min_max.__doc__"""
group_min_max(out, counts, values, labels, min_count=min_count, compute_max=False)

Expand Down Expand Up @@ -1311,7 +1310,7 @@ def group_cummin(groupby_t[:, ::1] out,
ndarray[groupby_t, ndim=2] values,
const intp_t[:] labels,
int ngroups,
bint is_datetimelike):
bint is_datetimelike) -> None:
"""See group_cummin_max.__doc__"""
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=False)

Expand All @@ -1322,6 +1321,6 @@ def group_cummax(groupby_t[:, ::1] out,
ndarray[groupby_t, ndim=2] values,
const intp_t[:] labels,
int ngroups,
bint is_datetimelike):
bint is_datetimelike) -> None:
"""See group_cummin_max.__doc__"""
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=True)
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,6 @@ cdef class Int64Vector:
cdef bint external_view_exists

cdef resize(self)
cpdef to_array(self)
cpdef ndarray to_array(self)
cdef inline void append(self, int64_t x)
cdef extend(self, int64_t[:] x)
6 changes: 3 additions & 3 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ cdef class Factorizer:
ObjectVector uniques
Py_ssize_t count

def __init__(self, size_hint):
def __init__(self, size_hint: int):
self.table = PyObjectHashTable(size_hint)
self.uniques = ObjectVector()
self.count = 0
Expand Down Expand Up @@ -116,12 +116,12 @@ cdef class Int64Factorizer:
Int64Vector uniques
Py_ssize_t count

def __init__(self, size_hint):
def __init__(self, size_hint: int):
self.table = Int64HashTable(size_hint)
self.uniques = Int64Vector()
self.count = 0

def get_count(self):
def get_count(self) -> int:
return self.count

def factorize(self, const int64_t[:] values, sort=False,
Expand Down
27 changes: 15 additions & 12 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ cdef class {{name}}Vector:
def __len__(self) -> int:
return self.data.n

cpdef to_array(self):
cpdef ndarray to_array(self):
if self.data.m != self.data.n:
if self.external_view_exists:
# should never happen
Expand Down Expand Up @@ -288,7 +288,7 @@ cdef class StringVector:
def __len__(self) -> int:
return self.data.n

def to_array(self):
cpdef ndarray[object, ndim=1] to_array(self):
cdef:
ndarray ao
Py_ssize_t n
Expand Down Expand Up @@ -345,7 +345,7 @@ cdef class ObjectVector:
self.data[self.n] = <PyObject*>obj
self.n += 1

def to_array(self):
cpdef ndarray[object, ndim=1] to_array(self):
if self.m != self.n:
if self.external_view_exists:
raise ValueError("should have raised on append()")
Expand Down Expand Up @@ -403,7 +403,7 @@ cdef class {{name}}HashTable(HashTable):
kh_destroy_{{dtype}}(self.table)
self.table = NULL

def __contains__(self, object key):
def __contains__(self, object key) -> bool:
cdef:
khiter_t k
{{c_type}} ckey
Expand Down Expand Up @@ -452,7 +452,7 @@ cdef class {{name}}HashTable(HashTable):
raise KeyError(key)

@cython.boundscheck(False)
def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values):
def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values) -> None:
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand All @@ -466,7 +466,7 @@ cdef class {{name}}HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t>values[i]

@cython.boundscheck(False)
def map_locations(self, const {{dtype}}_t[:] values):
def map_locations(self, const {{dtype}}_t[:] values) -> None:
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand All @@ -480,7 +480,8 @@ cdef class {{name}}HashTable(HashTable):
self.table.vals[k] = i

@cython.boundscheck(False)
def lookup(self, const {{dtype}}_t[:] values):
def lookup(self, const {{dtype}}_t[:] values) -> ndarray:
# -> np.ndarray[np.intp]
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -818,7 +819,8 @@ cdef class StringHashTable(HashTable):
return labels

@cython.boundscheck(False)
def lookup(self, ndarray[object] values):
def lookup(self, ndarray[object] values) -> ndarray:
# -> np.ndarray[np.intp]
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -853,7 +855,7 @@ cdef class StringHashTable(HashTable):
return np.asarray(locs)

@cython.boundscheck(False)
def map_locations(self, ndarray[object] values):
def map_locations(self, ndarray[object] values) -> None:
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -1071,7 +1073,7 @@ cdef class PyObjectHashTable(HashTable):
def __len__(self) -> int:
return self.table.size

def __contains__(self, object key):
def __contains__(self, object key) -> bool:
cdef:
khiter_t k
hash(key)
Expand Down Expand Up @@ -1123,7 +1125,7 @@ cdef class PyObjectHashTable(HashTable):
else:
raise KeyError(key)

def map_locations(self, ndarray[object] values):
def map_locations(self, ndarray[object] values) -> None:
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand All @@ -1137,7 +1139,8 @@ cdef class PyObjectHashTable(HashTable):
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
self.table.vals[k] = i

def lookup(self, ndarray[object] values):
def lookup(self, ndarray[object] values) -> ndarray:
# -> np.ndarray[np.intp]
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ cdef class IndexEngine:
return self._maybe_get_bool_indexer(val)

cdef _maybe_get_bool_indexer(self, object val):
# Returns ndarray[bool] or int
cdef:
ndarray[uint8_t, ndim=1, cast=True] indexer

Expand Down Expand Up @@ -247,7 +248,7 @@ cdef class IndexEngine:

self.need_unique_check = 0

cdef void _call_map_locations(self, values):
cdef void _call_map_locations(self, ndarray values):
self.mapping.map_locations(values)

def clear_mapping(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ cdef class {{name}}Engine(IndexEngine):
raise KeyError(val)
{{endif}}

cdef void _call_map_locations(self, values):
cdef void _call_map_locations(self, ndarray values):
self.mapping.map_locations(algos.ensure_{{name.lower()}}(values))

cdef _maybe_get_bool_indexer(self, object val):
# Returns ndarray[bool] or int
cdef:
ndarray[uint8_t, ndim=1, cast=True] indexer
ndarray[intp_t, ndim=1] found
Expand Down