Skip to content

Commit 353a8a3

Browse files
committed
Merge remote-tracking branch 'upstream/main' into BUG-Pyarrow-implementation-of-str.fullmatch-matches-partial-string.-Issue-pandas-dev#56652
2 parents c084844 + 0c23e18 commit 353a8a3

File tree

171 files changed

+2066
-2571
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+2066
-2571
lines changed

doc/source/whatsnew/v2.2.0.rst

+8
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,14 @@ DataFrame. (:issue:`54938`)
251251
)
252252
series.struct.explode()
253253
254+
Use :meth:`Series.struct.field` to index into a (possible nested)
255+
struct field.
256+
257+
258+
.. ipython:: python
259+
260+
series.struct.field("project")
261+
254262
.. _whatsnew_220.enhancements.list_accessor:
255263

256264
Series.list accessor for PyArrow list data

pandas/_libs/hashtable.pyi

+22-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ from typing import (
22
Any,
33
Hashable,
44
Literal,
5+
overload,
56
)
67

78
import numpy as np
@@ -180,18 +181,30 @@ class HashTable:
180181
na_value: object = ...,
181182
mask=...,
182183
) -> npt.NDArray[np.intp]: ...
184+
@overload
183185
def unique(
184186
self,
185187
values: np.ndarray, # np.ndarray[subclass-specific]
186-
return_inverse: bool = ...,
187-
mask=...,
188-
) -> (
189-
tuple[
190-
np.ndarray, # np.ndarray[subclass-specific]
191-
npt.NDArray[np.intp],
192-
]
193-
| np.ndarray
194-
): ... # np.ndarray[subclass-specific]
188+
*,
189+
return_inverse: Literal[False] = ...,
190+
mask: None = ...,
191+
) -> np.ndarray: ... # np.ndarray[subclass-specific]
192+
@overload
193+
def unique(
194+
self,
195+
values: np.ndarray, # np.ndarray[subclass-specific]
196+
*,
197+
return_inverse: Literal[True],
198+
mask: None = ...,
199+
) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific]
200+
@overload
201+
def unique(
202+
self,
203+
values: np.ndarray, # np.ndarray[subclass-specific]
204+
*,
205+
return_inverse: Literal[False] = ...,
206+
mask: npt.NDArray[np.bool_],
207+
) -> tuple[np.ndarray, npt.NDArray[np.bool_],]: ... # np.ndarray[subclass-specific]
195208
def factorize(
196209
self,
197210
values: np.ndarray, # np.ndarray[subclass-specific]

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ cdef class {{name}}HashTable(HashTable):
755755
return uniques.to_array(), result_mask.to_array()
756756
return uniques.to_array()
757757

758-
def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False, object mask=None):
758+
def unique(self, const {{dtype}}_t[:] values, *, bint return_inverse=False, object mask=None):
759759
"""
760760
Calculate unique values and labels (no sorting!)
761761

@@ -1180,7 +1180,7 @@ cdef class StringHashTable(HashTable):
11801180
return uniques.to_array(), labels.base # .base -> underlying ndarray
11811181
return uniques.to_array()
11821182

1183-
def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
1183+
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
11841184
"""
11851185
Calculate unique values and labels (no sorting!)
11861186

@@ -1438,7 +1438,7 @@ cdef class PyObjectHashTable(HashTable):
14381438
return uniques.to_array(), labels.base # .base -> underlying ndarray
14391439
return uniques.to_array()
14401440

1441-
def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
1441+
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
14421442
"""
14431443
Calculate unique values and labels (no sorting!)
14441444

pandas/_typing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
# array-like
110110

111111
ArrayLike = Union["ExtensionArray", np.ndarray]
112+
ArrayLikeT = TypeVar("ArrayLikeT", "ExtensionArray", np.ndarray)
112113
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
113114
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
114115

@@ -137,7 +138,7 @@ def __len__(self) -> int:
137138
def __iter__(self) -> Iterator[_T_co]:
138139
...
139140

140-
def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
141+
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int:
141142
...
142143

143144
def count(self, value: Any, /) -> int:

pandas/compat/pickle_compat.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import copy
88
import io
99
import pickle as pkl
10-
from typing import TYPE_CHECKING
10+
from typing import (
11+
TYPE_CHECKING,
12+
Any,
13+
)
1114

1215
import numpy as np
1316

@@ -209,7 +212,7 @@ def load_newobj_ex(self) -> None:
209212
pass
210213

211214

212-
def load(fh, encoding: str | None = None, is_verbose: bool = False):
215+
def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
213216
"""
214217
Load a pickle, with a provided encoding,
215218
@@ -239,7 +242,7 @@ def loads(
239242
fix_imports: bool = True,
240243
encoding: str = "ASCII",
241244
errors: str = "strict",
242-
):
245+
) -> Any:
243246
"""
244247
Analogous to pickle._loads.
245248
"""

pandas/conftest.py

+84-33
Original file line numberDiff line numberDiff line change
@@ -281,17 +281,6 @@ def axis(request):
281281
return request.param
282282

283283

284-
axis_frame = axis
285-
286-
287-
@pytest.fixture(params=[1, "columns"], ids=lambda x: f"axis={repr(x)}")
288-
def axis_1(request):
289-
"""
290-
Fixture for returning aliases of axis 1 of a DataFrame.
291-
"""
292-
return request.param
293-
294-
295284
@pytest.fixture(params=[True, False, None])
296285
def observed(request):
297286
"""
@@ -313,6 +302,22 @@ def ordered(request):
313302
return request.param
314303

315304

305+
@pytest.fixture(params=[True, False])
306+
def dropna(request):
307+
"""
308+
Boolean 'dropna' parameter.
309+
"""
310+
return request.param
311+
312+
313+
@pytest.fixture(params=[True, False])
314+
def sort(request):
315+
"""
316+
Boolean 'sort' parameter.
317+
"""
318+
return request.param
319+
320+
316321
@pytest.fixture(params=[True, False])
317322
def skipna(request):
318323
"""
@@ -414,6 +419,74 @@ def nselect_method(request):
414419
return request.param
415420

416421

422+
@pytest.fixture(params=[None, "ignore"])
423+
def na_action(request):
424+
"""
425+
Fixture for 'na_action' argument in map.
426+
"""
427+
return request.param
428+
429+
430+
@pytest.fixture(params=[True, False])
431+
def ascending(request):
432+
"""
433+
Fixture for 'na_action' argument in sort_values/sort_index/rank.
434+
"""
435+
return request.param
436+
437+
438+
@pytest.fixture(params=["average", "min", "max", "first", "dense"])
439+
def rank_method(request):
440+
"""
441+
Fixture for 'rank' argument in rank.
442+
"""
443+
return request.param
444+
445+
446+
@pytest.fixture(params=[True, False])
447+
def as_index(request):
448+
"""
449+
Fixture for 'as_index' argument in groupby.
450+
"""
451+
return request.param
452+
453+
454+
@pytest.fixture(params=[True, False])
455+
def cache(request):
456+
"""
457+
Fixture for 'cache' argument in to_datetime.
458+
"""
459+
return request.param
460+
461+
462+
@pytest.fixture(params=[True, False])
463+
def parallel(request):
464+
"""
465+
Fixture for parallel keyword argument for numba.jit.
466+
"""
467+
return request.param
468+
469+
470+
# Can parameterize nogil & nopython over True | False, but limiting per
471+
# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472
472+
473+
474+
@pytest.fixture(params=[False])
475+
def nogil(request):
476+
"""
477+
Fixture for nogil keyword argument for numba.jit.
478+
"""
479+
return request.param
480+
481+
482+
@pytest.fixture(params=[True])
483+
def nopython(request):
484+
"""
485+
Fixture for nopython keyword argument for numba.jit.
486+
"""
487+
return request.param
488+
489+
417490
# ----------------------------------------------------------------
418491
# Missing values & co.
419492
# ----------------------------------------------------------------
@@ -478,10 +551,6 @@ def index_or_series(request):
478551
return request.param
479552

480553

481-
# Generate cartesian product of index_or_series fixture:
482-
index_or_series2 = index_or_series
483-
484-
485554
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
486555
def index_or_series_or_array(request):
487556
"""
@@ -674,10 +743,6 @@ def index(request):
674743
return indices_dict[request.param].copy()
675744

676745

677-
# Needed to generate cartesian product of indices
678-
index_fixture2 = index
679-
680-
681746
@pytest.fixture(
682747
params=[
683748
key for key, value in indices_dict.items() if not isinstance(value, MultiIndex)
@@ -691,10 +756,6 @@ def index_flat(request):
691756
return indices_dict[key].copy()
692757

693758

694-
# Alias so we can test with cartesian product of index_flat
695-
index_flat2 = index_flat
696-
697-
698759
@pytest.fixture(
699760
params=[
700761
key
@@ -1777,16 +1838,6 @@ def ip():
17771838
return InteractiveShell(config=c)
17781839

17791840

1780-
@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])
1781-
def spmatrix(request):
1782-
"""
1783-
Yields scipy sparse matrix classes.
1784-
"""
1785-
sparse = pytest.importorskip("scipy.sparse")
1786-
1787-
return getattr(sparse, request.param + "_matrix")
1788-
1789-
17901841
@pytest.fixture(
17911842
params=[
17921843
getattr(pd.offsets, o)

pandas/core/accessor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class PandasDelegate:
5454
def _delegate_property_get(self, name: str, *args, **kwargs):
5555
raise TypeError(f"You cannot access the property {name}")
5656

57-
def _delegate_property_set(self, name: str, value, *args, **kwargs):
57+
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> None:
5858
raise TypeError(f"The property {name} cannot be set")
5959

6060
def _delegate_method(self, name: str, *args, **kwargs):

pandas/core/algorithms.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pandas._typing import (
2626
AnyArrayLike,
2727
ArrayLike,
28+
ArrayLikeT,
2829
AxisInt,
2930
DtypeObj,
3031
TakeIndexer,
@@ -182,8 +183,8 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
182183

183184

184185
def _reconstruct_data(
185-
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
186-
) -> ArrayLike:
186+
values: ArrayLikeT, dtype: DtypeObj, original: AnyArrayLike
187+
) -> ArrayLikeT:
187188
"""
188189
reverse of _ensure_data
189190
@@ -206,7 +207,9 @@ def _reconstruct_data(
206207
# that values.dtype == dtype
207208
cls = dtype.construct_array_type()
208209

209-
values = cls._from_sequence(values, dtype=dtype)
210+
# error: Incompatible types in assignment (expression has type
211+
# "ExtensionArray", variable has type "ndarray[Any, Any]")
212+
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]
210213

211214
else:
212215
values = values.astype(dtype, copy=False)
@@ -259,7 +262,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
259262
}
260263

261264

262-
def _get_hashtable_algo(values: np.ndarray):
265+
def _get_hashtable_algo(
266+
values: np.ndarray,
267+
) -> tuple[type[htable.HashTable], np.ndarray]:
263268
"""
264269
Parameters
265270
----------
@@ -1550,7 +1555,9 @@ def safe_sort(
15501555
hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
15511556
t = hash_klass(len(values))
15521557
t.map_locations(values)
1553-
sorter = ensure_platform_int(t.lookup(ordered))
1558+
# error: Argument 1 to "lookup" of "HashTable" has incompatible type
1559+
# "ExtensionArray | ndarray[Any, Any] | Index | Series"; expected "ndarray"
1560+
sorter = ensure_platform_int(t.lookup(ordered)) # type: ignore[arg-type]
15541561

15551562
if use_na_sentinel:
15561563
# take_nd is faster, but only works for na_sentinels of -1

0 commit comments

Comments
 (0)