Skip to content

Commit 81860e3

Browse files
committed
Merge remote-tracking branch 'upstream/master' into boolean-array
2 parents 1a08e61 + ae75f35 commit 81860e3

File tree

10 files changed

+57
-48
lines changed

10 files changed

+57
-48
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ Deprecations
256256
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
257257
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
258258
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
259+
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
259260

260261
.. ---------------------------------------------------------------------------
261262

pandas/_libs/algos.pyx

+15-24
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,17 @@ from pandas._libs.khash cimport (
5050

5151
import pandas._libs.missing as missing
5252

53-
cdef float64_t FP_ERR = 1e-13
54-
55-
cdef float64_t NaN = <float64_t>np.NaN
56-
57-
cdef int64_t NPY_NAT = get_nat()
53+
cdef:
54+
float64_t FP_ERR = 1e-13
55+
float64_t NaN = <float64_t>np.NaN
56+
int64_t NPY_NAT = get_nat()
5857

5958
tiebreakers = {
60-
'average': TIEBREAK_AVERAGE,
61-
'min': TIEBREAK_MIN,
62-
'max': TIEBREAK_MAX,
63-
'first': TIEBREAK_FIRST,
64-
'dense': TIEBREAK_DENSE,
59+
"average": TIEBREAK_AVERAGE,
60+
"min": TIEBREAK_MIN,
61+
"max": TIEBREAK_MAX,
62+
"first": TIEBREAK_FIRST,
63+
"dense": TIEBREAK_DENSE,
6564
}
6665

6766

@@ -120,6 +119,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
120119
kh_int64_t *table
121120
int ret = 0
122121
list uniques = []
122+
ndarray[int64_t, ndim=1] result
123123

124124
table = kh_init_int64()
125125
kh_resize_int64(table, 10)
@@ -261,7 +261,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
261261

262262
@cython.boundscheck(False)
263263
@cython.wraparound(False)
264-
def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
264+
def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
265265
cdef:
266266
Py_ssize_t i, j, xi, yi, N, K
267267
bint minpv
@@ -325,7 +325,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
325325

326326
@cython.boundscheck(False)
327327
@cython.wraparound(False)
328-
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
328+
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1) -> ndarray:
329329
cdef:
330330
Py_ssize_t i, j, xi, yi, N, K
331331
ndarray[float64_t, ndim=2] result
@@ -581,7 +581,7 @@ D
581581

582582
@cython.boundscheck(False)
583583
@cython.wraparound(False)
584-
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
584+
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
585585
cdef:
586586
Py_ssize_t i, j, nleft, nright
587587
ndarray[int64_t, ndim=1] indexer
@@ -810,18 +810,14 @@ def rank_1d(
810810
"""
811811
cdef:
812812
Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
813-
814813
ndarray[rank_t] sorted_data, values
815-
816814
ndarray[float64_t] ranks
817815
ndarray[int64_t] argsorted
818816
ndarray[uint8_t, cast=True] sorted_mask
819-
820817
rank_t val, nan_value
821-
822818
float64_t sum_ranks = 0
823819
int tiebreak = 0
824-
bint keep_na = 0
820+
bint keep_na = False
825821
bint isnan, condition
826822
float64_t count = 0.0
827823

@@ -1034,19 +1030,14 @@ def rank_2d(
10341030
"""
10351031
cdef:
10361032
Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0
1037-
10381033
Py_ssize_t infs
1039-
10401034
ndarray[float64_t, ndim=2] ranks
10411035
ndarray[rank_t, ndim=2] values
1042-
10431036
ndarray[int64_t, ndim=2] argsorted
1044-
10451037
rank_t val, nan_value
1046-
10471038
float64_t sum_ranks = 0
10481039
int tiebreak = 0
1049-
bint keep_na = 0
1040+
bint keep_na = False
10501041
float64_t count = 0.0
10511042
bint condition, skip_condition
10521043

pandas/core/dtypes/cast.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
from datetime import date, datetime, timedelta
6+
from typing import TYPE_CHECKING, Type
67

78
import numpy as np
89

@@ -63,13 +64,18 @@
6364
ABCDataFrame,
6465
ABCDatetimeArray,
6566
ABCDatetimeIndex,
67+
ABCExtensionArray,
6668
ABCPeriodArray,
6769
ABCPeriodIndex,
6870
ABCSeries,
6971
)
7072
from pandas.core.dtypes.inference import is_list_like
7173
from pandas.core.dtypes.missing import isna, notna
7274

75+
if TYPE_CHECKING:
76+
from pandas import Series
77+
from pandas.core.arrays import ExtensionArray # noqa: F401
78+
7379
_int8_max = np.iinfo(np.int8).max
7480
_int16_max = np.iinfo(np.int16).max
7581
_int32_max = np.iinfo(np.int32).max
@@ -246,18 +252,16 @@ def trans(x):
246252
return result
247253

248254

249-
def maybe_cast_result(
250-
result, obj: ABCSeries, numeric_only: bool = False, how: str = ""
251-
):
255+
def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""):
252256
"""
253257
Try casting result to a different type if appropriate
254258
255259
Parameters
256260
----------
257261
result : array-like
258262
Result to cast.
259-
obj : ABCSeries
260-
Input series from which result was calculated.
263+
obj : Series
264+
Input Series from which result was calculated.
261265
numeric_only : bool, default False
262266
Whether to cast only numerics or datetimes as well.
263267
how : str, default ""
@@ -313,13 +317,13 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
313317
return d.get((dtype, how), dtype)
314318

315319

316-
def maybe_cast_to_extension_array(cls, obj, dtype=None):
320+
def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
317321
"""
318322
Call to `_from_sequence` that returns the object unchanged on Exception.
319323
320324
Parameters
321325
----------
322-
cls : ExtensionArray subclass
326+
cls : class, subclass of ExtensionArray
323327
obj : arraylike
324328
Values to pass to cls._from_sequence
325329
dtype : ExtensionDtype, optional
@@ -329,6 +333,8 @@ def maybe_cast_to_extension_array(cls, obj, dtype=None):
329333
ExtensionArray or obj
330334
"""
331335
assert isinstance(cls, type), f"must pass a type: {cls}"
336+
assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
337+
assert issubclass(cls, ABCExtensionArray), assertion_msg
332338
try:
333339
result = cls._from_sequence(obj, dtype=dtype)
334340
except Exception:

pandas/core/groupby/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def pinner(cls):
151151

152152

153153
@pin_whitelisted_properties(Series, base.series_apply_whitelist)
154-
class SeriesGroupBy(GroupBy):
154+
class SeriesGroupBy(GroupBy[Series]):
155155
_apply_whitelist = base.series_apply_whitelist
156156

157157
def _iterate_slices(self) -> Iterable[Series]:
@@ -815,7 +815,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
815815

816816

817817
@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
818-
class DataFrameGroupBy(GroupBy):
818+
class DataFrameGroupBy(GroupBy[DataFrame]):
819819

820820
_apply_whitelist = base.dataframe_apply_whitelist
821821

@@ -1462,7 +1462,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame:
14621462
for i, _ in enumerate(result.columns):
14631463
res = algorithms.take_1d(result.iloc[:, i].values, ids)
14641464
# TODO: we have no test cases that get here with EA dtypes;
1465-
# try_cast may not be needed if EAs never get here
1465+
# maybe_cast_result may not be needed if EAs never get here
14661466
if cast:
14671467
res = maybe_cast_result(res, obj.iloc[:, i], how=func_nm)
14681468
output.append(res)

pandas/core/groupby/groupby.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@ class providing the base-class of operations.
1717
Callable,
1818
Dict,
1919
FrozenSet,
20+
Generic,
2021
Hashable,
2122
Iterable,
2223
List,
2324
Mapping,
2425
Optional,
2526
Tuple,
2627
Type,
28+
TypeVar,
2729
Union,
2830
)
2931

@@ -353,13 +355,13 @@ def _group_selection_context(groupby):
353355
]
354356

355357

356-
class _GroupBy(PandasObject, SelectionMixin):
358+
class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
357359
_group_selection = None
358360
_apply_whitelist: FrozenSet[str] = frozenset()
359361

360362
def __init__(
361363
self,
362-
obj: NDFrame,
364+
obj: FrameOrSeries,
363365
keys: Optional[_KeysArgType] = None,
364366
axis: int = 0,
365367
level=None,
@@ -995,7 +997,11 @@ def _apply_filter(self, indices, dropna):
995997
return filtered
996998

997999

998-
class GroupBy(_GroupBy):
1000+
# To track operations that expand dimensions, like ohlc
1001+
OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)
1002+
1003+
1004+
class GroupBy(_GroupBy[FrameOrSeries]):
9991005
"""
10001006
Class for grouping and aggregating relational data.
10011007
@@ -2420,8 +2426,8 @@ def tail(self, n=5):
24202426
return self._selected_obj[mask]
24212427

24222428
def _reindex_output(
2423-
self, output: FrameOrSeries, fill_value: Scalar = np.NaN
2424-
) -> FrameOrSeries:
2429+
self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN
2430+
) -> OutputFrameOrSeries:
24252431
"""
24262432
If we have categorical groupers, then we might want to make sure that
24272433
we have a fully re-indexed output to the levels. This means expanding

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ def _aggregate_series_pure_python(self, obj: Series, func):
682682

683683
assert result is not None
684684
result = lib.maybe_convert_objects(result, try_float=0)
685-
# TODO: try_cast back to EA?
685+
# TODO: maybe_cast_to_extension_array?
686686

687687
return result, counts
688688

pandas/core/indexes/base.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1955,6 +1955,12 @@ def is_mixed(self) -> bool:
19551955
>>> idx.is_mixed()
19561956
False
19571957
"""
1958+
warnings.warn(
1959+
"Index.is_mixed is deprecated and will be removed in a future version. "
1960+
"Check index.inferred_type directly instead.",
1961+
FutureWarning,
1962+
stacklevel=2,
1963+
)
19581964
return self.inferred_type in ["mixed"]
19591965

19601966
def holds_integer(self) -> bool:
@@ -3131,7 +3137,7 @@ def is_int(v):
31313137
# convert the slice to an indexer here
31323138

31333139
# if we are mixed and have integers
3134-
if is_positional and self.is_mixed():
3140+
if is_positional:
31353141
try:
31363142
# Validate start & stop
31373143
if start is not None:

pandas/core/internals/managers.py

-3
Original file line numberDiff line numberDiff line change
@@ -791,9 +791,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager":
791791
bm = type(self)(new_blocks, new_axes, do_integrity_check=False)
792792
return bm
793793

794-
def __contains__(self, item) -> bool:
795-
return item in self.items
796-
797794
@property
798795
def nblocks(self) -> int:
799796
return len(self.blocks)

pandas/tests/indexes/test_base.py

+6
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,12 @@ def test_intersection_difference(self, indices, sort):
11601160
diff = indices.difference(indices, sort=sort)
11611161
tm.assert_index_equal(inter, diff)
11621162

1163+
def test_is_mixed_deprecated(self):
1164+
# GH#32922
1165+
index = self.create_index()
1166+
with tm.assert_produces_warning(FutureWarning):
1167+
index.is_mixed()
1168+
11631169
@pytest.mark.parametrize(
11641170
"indices, expected",
11651171
[

pandas/tests/internals/test_internals.py

-4
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,6 @@ def test_duplicate_ref_loc_failure(self):
301301
mgr = BlockManager(blocks, axes)
302302
mgr.iget(1)
303303

304-
def test_contains(self, mgr):
305-
assert "a" in mgr
306-
assert "baz" not in mgr
307-
308304
def test_pickle(self, mgr):
309305

310306
mgr2 = tm.round_trip_pickle(mgr)

0 commit comments

Comments
 (0)