Skip to content

CLN/TYP: assorted #43810

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/indexing_engines.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Benchmarks in this fiel depend exclusively on code in _libs/
Benchmarks in this file depend exclusively on code in _libs/

If a PR does not edit anything in _libs, it is very unlikely that benchmarks
in this file will be affected.
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ from pandas._typing import npt

def group_median_float64(
out: np.ndarray, # ndarray[float64_t, ndim=2]
counts: np.ndarray, # ndarray[int64_t]
counts: npt.NDArray[np.int64],
values: np.ndarray, # ndarray[float64_t, ndim=2]
labels: np.ndarray, # ndarray[int64_t]
labels: npt.NDArray[np.int64],
min_count: int = ..., # Py_ssize_t
) -> None: ...
def group_cumprod_float64(
Expand Down Expand Up @@ -37,7 +37,7 @@ def group_fillna_indexer(
out: np.ndarray, # ndarray[intp_t]
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: np.ndarray, # ndarray[uint8_t]
mask: npt.NDArray[np.uint8],
direction: Literal["ffill", "bfill"],
limit: int, # int64_t
dropna: bool,
Expand Down
66 changes: 31 additions & 35 deletions pandas/_libs/hashtable.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from typing import (

import numpy as np

from pandas._typing import npt

def unique_label_indices(
labels: np.ndarray, # const int64_t[:]
) -> np.ndarray: ...
Expand All @@ -19,11 +21,11 @@ class ObjectFactorizer(Factorizer):
uniques: ObjectVector
def factorize(
self,
values: np.ndarray, # ndarray[object]
values: npt.NDArray[np.object_],
sort: bool = ...,
na_sentinel=...,
na_value=...,
) -> np.ndarray: ... # np.ndarray[intp]
) -> npt.NDArray[np.intp]: ...

class Int64Factorizer(Factorizer):
table: Int64HashTable
Expand All @@ -34,77 +36,77 @@ class Int64Factorizer(Factorizer):
sort: bool = ...,
na_sentinel=...,
na_value=...,
) -> np.ndarray: ... # np.ndarray[intp]
) -> npt.NDArray[np.intp]: ...

class Int64Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int64]
def to_array(self) -> npt.NDArray[np.int64]: ...

class Int32Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int32]
def to_array(self) -> npt.NDArray[np.int32]: ...

class Int16Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int16]
def to_array(self) -> npt.NDArray[np.int16]: ...

class Int8Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int8]
def to_array(self) -> npt.NDArray[np.int8]: ...

class UInt64Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint64]
def to_array(self) -> npt.NDArray[np.uint64]: ...

class UInt32Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint32]
def to_array(self) -> npt.NDArray[np.uint32]: ...

class UInt16Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint16]
def to_array(self) -> npt.NDArray[np.uint16]: ...

class UInt8Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint8]
def to_array(self) -> npt.NDArray[np.uint8]: ...

class Float64Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.float64]
def to_array(self) -> npt.NDArray[np.float64]: ...

class Float32Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.float32]
def to_array(self) -> npt.NDArray[np.float32]: ...

class Complex128Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.complex128]
def to_array(self) -> npt.NDArray[np.complex128]: ...

class Complex64Vector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[np.complex64]
def to_array(self) -> npt.NDArray[np.complex64]: ...

class StringVector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[object]
def to_array(self) -> npt.NDArray[np.object_]: ...

class ObjectVector:
def __init__(self): ...
def __len__(self) -> int: ...
def to_array(self) -> np.ndarray: ... # np.ndarray[object]
def to_array(self) -> npt.NDArray[np.object_]: ...

class HashTable:
# NB: The base HashTable class does _not_ actually have these methods;
Expand All @@ -131,22 +133,22 @@ class HashTable:
def lookup(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
) -> np.ndarray: ... # np.ndarray[np.intp]
) -> npt.NDArray[np.intp]: ...
def get_labels(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
uniques, # SubclassTypeVector
count_prior: int = ...,
na_sentinel: int = ...,
na_value: object = ...,
) -> np.ndarray: ... # np.ndarray[intp_t]
) -> npt.NDArray[np.intp]: ...
def unique(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
) -> tuple[
np.ndarray, # np.ndarray[subclass-specific]
np.ndarray, # np.ndarray[np.intp],
npt.NDArray[np.intp],
] | np.ndarray: ... # np.ndarray[subclass-specific]
def _unique(
self,
Expand All @@ -159,18 +161,15 @@ class HashTable:
return_inverse: bool = ...,
) -> tuple[
np.ndarray, # np.ndarray[subclass-specific]
np.ndarray, # np.ndarray[np.intp],
npt.NDArray[np.intp],
] | np.ndarray: ... # np.ndarray[subclass-specific]
def factorize(
self,
values: np.ndarray, # np.ndarray[subclass-specific]
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
) -> tuple[
np.ndarray, # np.ndarray[subclass-specific]
np.ndarray, # np.ndarray[np.intp],
]: ...
) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific]

class Complex128HashTable(HashTable): ...
class Complex64HashTable(HashTable): ...
Expand All @@ -182,10 +181,7 @@ class Int64HashTable(HashTable):
def get_labels_groupby(
self,
values: np.ndarray, # const int64_t[:]
) -> tuple[
np.ndarray, # np.ndarray[np.intp]
np.ndarray, # np.ndarray[np.int64]
]: ...
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64],]: ...

class Int32HashTable(HashTable): ...
class Int16HashTable(HashTable): ...
Expand All @@ -200,32 +196,32 @@ class PyObjectHashTable(HashTable): ...
def duplicated_int64(
values: np.ndarray, # const int64_t[:] values
keep: Literal["last", "first", False] = ...,
) -> np.ndarray: ... # np.ndarray[bool]
) -> npt.NDArray[np.bool_]: ...

# TODO: Is it actually bool or is it uint8?

def mode_int64(
values: np.ndarray, # const int64_t[:] values
dropna: bool,
) -> np.ndarray: ... # np.ndarray[np.int64]
) -> npt.NDArray[np.int64]: ...
def value_count_int64(
values: np.ndarray, # const int64_t[:]
dropna: bool,
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.int64] # np.ndarray[np.int64]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
def duplicated(
values: np.ndarray,
keep: Literal["last", "first", False] = ...,
) -> np.ndarray: ... # np.ndarray[bool]
) -> npt.NDArray[np.bool_]: ...
def mode(values: np.ndarray, dropna: bool) -> np.ndarray: ...
def value_count(
values: np.ndarray,
dropna: bool,
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.int64]
) -> tuple[np.ndarray, npt.NDArray[np.int64],]: ... # np.ndarray[same-as-values]

# arr and values should have same dtype
def ismember(
arr: np.ndarray,
values: np.ndarray,
) -> np.ndarray: ... # np.ndarray[bool]
) -> npt.NDArray[np.bool_]: ...
def object_hash(obj) -> int: ...
def objects_are_equal(a, b) -> bool: ...
2 changes: 0 additions & 2 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ cdef class IndexEngine:
Py_ssize_t i, j, n, n_t, n_alloc
bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True

self._ensure_mapping_populated()
values = self.values
stargets = set(targets)

Expand Down Expand Up @@ -740,7 +739,6 @@ cdef class BaseMultiIndexCodesEngine:
return self._base.get_loc(self, lab_int)

def get_indexer_non_unique(self, target: np.ndarray) -> np.ndarray:
# target: MultiIndex
indexer = self._base.get_indexer_non_unique(self, target)

return indexer
Expand Down
17 changes: 15 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
DtypeObj,
F,
Shape,
npt,
)
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -1278,7 +1279,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:

return result_blocks

def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
def _unstack(
self,
unstacker,
fill_value,
new_placement: npt.NDArray[np.intp],
allow_fill: bool,
):
"""
Return a list of unstacked blocks of self

Expand Down Expand Up @@ -1668,7 +1675,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:

return [self.make_block_same_class(result)]

def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
def _unstack(
self,
unstacker,
fill_value,
new_placement: npt.NDArray[np.intp],
allow_fill: bool,
):
# ExtensionArray-safe unstack.
# We override ObjectBlock._unstack, which unstacks directly on the
# values of the array. For EA-backed blocks, this would require
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2172,10 +2172,16 @@ def _factorize_keys(

rizer = klass(max(len(lk), len(rk)))

llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
assert llab.dtype == np.intp, llab.dtype
assert rlab.dtype == np.intp, rlab.dtype
# Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
# "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
# ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
llab = rizer.factorize(lk) # type: ignore[arg-type]
# Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
# "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
# ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
rlab = rizer.factorize(rk) # type: ignore[arg-type]
assert llab.dtype == np.dtype(np.intp), llab.dtype
assert rlab.dtype == np.dtype(np.intp), rlab.dtype

count = rizer.get_count()

Expand Down
10 changes: 7 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,9 @@ def __init__(
self.name = name
self._set_axis(0, index, fastpath=True)

def _init_dict(self, data, index=None, dtype: Dtype | None = None):
def _init_dict(
self, data, index: Index | None = None, dtype: DtypeObj | None = None
):
"""
Derive the "_mgr" and "index" attributes of a new Series from a
dictionary input.
Expand All @@ -458,16 +460,18 @@ def _init_dict(self, data, index=None, dtype: Dtype | None = None):
----------
data : dict or dict-like
Data used to populate the new Series.
index : Index or index-like, default None
index : Index or None, default None
Index for the new Series: if None, use dict keys.
dtype : dtype, default None
dtype : np.dtype, ExtensionDtype, or None, default None
The dtype for the new Series: if None, infer from data.

Returns
-------
_data : BlockManager for the new Series
index : index for the new Series
"""
keys: Index | tuple

# Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
# raises KeyError), so we iterate the entire dict, and align
if data:
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def df():
# TODO(ArrayManager) dask is still accessing the blocks
# https://github.com/dask/dask/pull/7318
@td.skip_array_manager_not_yet_implemented
@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you file a dask issue about handling this

def test_dask(df):

toolz = import_module("toolz") # noqa
Expand Down Expand Up @@ -92,6 +93,7 @@ def test_oo_optimized_datetime_index_unpickle():
# Cython import warning
@pytest.mark.filterwarnings("ignore:pandas.util.testing is deprecated")
@pytest.mark.filterwarnings("ignore:can't:ImportWarning")
@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
@pytest.mark.filterwarnings(
# patsy needs to update their imports
"ignore:Using or importing the ABCs from 'collections:DeprecationWarning"
Expand Down