Skip to content

Commit 4bc929e

Browse files
committed
Merge branch 'main' of github.com:Wong2333/pandas
2 parents 6ff05a5 + 2b9ca07 commit 4bc929e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+401
-249
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ jobs:
156156
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
157157

158158
- name: Build wheels
159-
uses: pypa/cibuildwheel@v2.20.0
159+
uses: pypa/cibuildwheel@v2.21.0
160160
with:
161161
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
162162
env:

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ repos:
7474
hooks:
7575
- id: isort
7676
- repo: https://github.com/asottile/pyupgrade
77-
rev: v3.16.0
77+
rev: v3.17.0
7878
hooks:
7979
- id: pyupgrade
8080
args: [--py310-plus]
@@ -112,7 +112,7 @@ repos:
112112
types: [python]
113113
stages: [manual]
114114
additional_dependencies: &pyright_dependencies
115-
115+
116116
- id: pyright
117117
# note: assumes python env is setup and activated
118118
name: pyright reportGeneralTypeIssues

environment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ dependencies:
7676
- cxx-compiler
7777

7878
# code checks
79-
- flake8=6.1.0 # run in subprocess over docstring examples
80-
- mypy=1.9.0 # pre-commit uses locally installed mypy
79+
- flake8=7.1.0 # run in subprocess over docstring examples
80+
- mypy=1.11.2 # pre-commit uses locally installed mypy
8181
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
82-
- pre-commit>=3.6.0
82+
- pre-commit>=4.0.1
8383

8484
# documentation
8585
- gitpython # obtain contributors from git for whatsnew

pandas/_config/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def __dir__(self) -> list[str]:
411411

412412

413413
@contextmanager
414-
def option_context(*args) -> Generator[None, None, None]:
414+
def option_context(*args) -> Generator[None]:
415415
"""
416416
Context manager to temporarily set options in a ``with`` statement.
417417
@@ -718,7 +718,7 @@ def _build_option_description(k: str) -> str:
718718

719719

720720
@contextmanager
721-
def config_prefix(prefix: str) -> Generator[None, None, None]:
721+
def config_prefix(prefix: str) -> Generator[None]:
722722
"""
723723
contextmanager for multiple invocations of API with a common prefix
724724

pandas/_config/localization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
@contextmanager
2626
def set_locale(
2727
new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
28-
) -> Generator[str | tuple[str, str], None, None]:
28+
) -> Generator[str | tuple[str, str]]:
2929
"""
3030
Context manager for temporarily setting a locale.
3131

pandas/_testing/_warnings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def assert_produces_warning(
3535
raise_on_extra_warnings: bool = True,
3636
match: str | tuple[str | None, ...] | None = None,
3737
must_find_all_warnings: bool = True,
38-
) -> Generator[list[warnings.WarningMessage], None, None]:
38+
) -> Generator[list[warnings.WarningMessage]]:
3939
"""
4040
Context manager for running code expected to either raise a specific warning,
4141
multiple specific warnings, or not raise any warnings. Verifies that the code

pandas/_testing/contexts.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
@contextmanager
3030
def decompress_file(
3131
path: FilePath | BaseBuffer, compression: CompressionOptions
32-
) -> Generator[IO[bytes], None, None]:
32+
) -> Generator[IO[bytes]]:
3333
"""
3434
Open a compressed file and return a file object.
3535
@@ -50,7 +50,7 @@ def decompress_file(
5050

5151

5252
@contextmanager
53-
def set_timezone(tz: str) -> Generator[None, None, None]:
53+
def set_timezone(tz: str) -> Generator[None]:
5454
"""
5555
Context manager for temporarily setting a timezone.
5656
@@ -92,7 +92,7 @@ def setTZ(tz) -> None:
9292

9393

9494
@contextmanager
95-
def ensure_clean(filename=None) -> Generator[Any, None, None]:
95+
def ensure_clean(filename=None) -> Generator[Any]:
9696
"""
9797
Gets a temporary path and agrees to remove on close.
9898
@@ -124,7 +124,7 @@ def ensure_clean(filename=None) -> Generator[Any, None, None]:
124124

125125

126126
@contextmanager
127-
def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
127+
def with_csv_dialect(name: str, **kwargs) -> Generator[None]:
128128
"""
129129
Context manager to temporarily register a CSV dialect for parsing CSV.
130130

pandas/compat/pickle_compat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def loads(
131131

132132

133133
@contextlib.contextmanager
134-
def patch_pickle() -> Generator[None, None, None]:
134+
def patch_pickle() -> Generator[None]:
135135
"""
136136
Temporarily patch pickle to use our unpickler.
137137
"""

pandas/core/apply.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,8 @@ def transform(self) -> DataFrame | Series:
246246
and not obj.empty
247247
):
248248
raise ValueError("Transform function failed")
249-
# error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
250-
# "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
251-
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
252-
# Series]"
253249
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
254-
obj.index # type: ignore[arg-type]
250+
obj.index
255251
):
256252
raise ValueError("Function did not transform")
257253

@@ -803,7 +799,7 @@ def result_columns(self) -> Index:
803799

804800
@property
805801
@abc.abstractmethod
806-
def series_generator(self) -> Generator[Series, None, None]:
802+
def series_generator(self) -> Generator[Series]:
807803
pass
808804

809805
@staticmethod
@@ -1128,7 +1124,7 @@ class FrameRowApply(FrameApply):
11281124
axis: AxisInt = 0
11291125

11301126
@property
1131-
def series_generator(self) -> Generator[Series, None, None]:
1127+
def series_generator(self) -> Generator[Series]:
11321128
return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))
11331129

11341130
@staticmethod
@@ -1235,7 +1231,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame:
12351231
return result.T
12361232

12371233
@property
1238-
def series_generator(self) -> Generator[Series, None, None]:
1234+
def series_generator(self) -> Generator[Series]:
12391235
values = self.values
12401236
values = ensure_wrapped_if_datetimelike(values)
12411237
assert len(values) > 0

pandas/core/arraylike.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,12 +403,12 @@ def _reconstruct(result):
403403
# for np.<ufunc>(..) calls
404404
# kwargs cannot necessarily be handled block-by-block, so only
405405
# take this path if there are no kwargs
406-
mgr = inputs[0]._mgr
406+
mgr = inputs[0]._mgr # pyright: ignore[reportGeneralTypeIssues]
407407
result = mgr.apply(getattr(ufunc, method))
408408
else:
409409
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
410410
# Those can have an axis keyword and thus can't be called block-by-block
411-
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
411+
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) # pyright: ignore[reportGeneralTypeIssues]
412412
# e.g. np.negative (only one reached), with "where" and "out" in kwargs
413413

414414
result = reconstruct(result)

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,14 @@
1010

1111
import numpy as np
1212

13+
from pandas._libs import lib
1314
from pandas.compat import (
1415
pa_version_under10p1,
1516
pa_version_under11p0,
1617
pa_version_under13p0,
1718
pa_version_under17p0,
1819
)
1920

20-
from pandas.core.dtypes.missing import isna
21-
2221
if not pa_version_under10p1:
2322
import pyarrow as pa
2423
import pyarrow.compute as pc
@@ -38,7 +37,7 @@ class ArrowStringArrayMixin:
3837
def __init__(self, *args, **kwargs) -> None:
3938
raise NotImplementedError
4039

41-
def _convert_bool_result(self, result):
40+
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
4241
# Convert a bool-dtype result to the appropriate result type
4342
raise NotImplementedError
4443

@@ -212,7 +211,9 @@ def _str_removesuffix(self, suffix: str):
212211
result = pc.if_else(ends_with, removed, self._pa_array)
213212
return type(self)(result)
214213

215-
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
214+
def _str_startswith(
215+
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
216+
):
216217
if isinstance(pat, str):
217218
result = pc.starts_with(self._pa_array, pattern=pat)
218219
else:
@@ -225,11 +226,11 @@ def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
225226

226227
for p in pat[1:]:
227228
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
228-
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
229-
result = result.fill_null(na)
230-
return self._convert_bool_result(result)
229+
return self._convert_bool_result(result, na=na, method_name="startswith")
231230

232-
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
231+
def _str_endswith(
232+
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
233+
):
233234
if isinstance(pat, str):
234235
result = pc.ends_with(self._pa_array, pattern=pat)
235236
else:
@@ -242,9 +243,7 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
242243

243244
for p in pat[1:]:
244245
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
245-
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
246-
result = result.fill_null(na)
247-
return self._convert_bool_result(result)
246+
return self._convert_bool_result(result, na=na, method_name="endswith")
248247

249248
def _str_isalnum(self):
250249
result = pc.utf8_is_alnum(self._pa_array)
@@ -283,7 +282,12 @@ def _str_isupper(self):
283282
return self._convert_bool_result(result)
284283

285284
def _str_contains(
286-
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
285+
self,
286+
pat,
287+
case: bool = True,
288+
flags: int = 0,
289+
na: Scalar | lib.NoDefault = lib.no_default,
290+
regex: bool = True,
287291
):
288292
if flags:
289293
raise NotImplementedError(f"contains not implemented with {flags=}")
@@ -293,19 +297,25 @@ def _str_contains(
293297
else:
294298
pa_contains = pc.match_substring
295299
result = pa_contains(self._pa_array, pat, ignore_case=not case)
296-
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
297-
result = result.fill_null(na)
298-
return self._convert_bool_result(result)
300+
return self._convert_bool_result(result, na=na, method_name="contains")
299301

300302
def _str_match(
301-
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
303+
self,
304+
pat: str,
305+
case: bool = True,
306+
flags: int = 0,
307+
na: Scalar | lib.NoDefault = lib.no_default,
302308
):
303309
if not pat.startswith("^"):
304310
pat = f"^{pat}"
305311
return self._str_contains(pat, case, flags, na, regex=True)
306312

307313
def _str_fullmatch(
308-
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
314+
self,
315+
pat,
316+
case: bool = True,
317+
flags: int = 0,
318+
na: Scalar | lib.NoDefault = lib.no_default,
309319
):
310320
if not pat.endswith("$") or pat.endswith("\\$"):
311321
pat = f"{pat}$"

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,7 +2318,9 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
23182318
for chunk in self._pa_array.iterchunks()
23192319
]
23202320

2321-
def _convert_bool_result(self, result):
2321+
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
2322+
if na is not lib.no_default and not isna(na): # pyright: ignore [reportGeneralTypeIssues]
2323+
result = result.fill_null(na)
23222324
return type(self)(result)
23232325

23242326
def _convert_int_result(self, result):
@@ -2426,7 +2428,7 @@ def _str_rindex(self, sub: str, start: int = 0, end: int | None = None) -> Self:
24262428
result = self._apply_elementwise(predicate)
24272429
return type(self)(pa.chunked_array(result))
24282430

2429-
def _str_normalize(self, form: str) -> Self:
2431+
def _str_normalize(self, form: Literal["NFC", "NFD", "NFKC", "NFKD"]) -> Self:
24302432
predicate = lambda val: unicodedata.normalize(form, val)
24312433
result = self._apply_elementwise(predicate)
24322434
return type(self)(pa.chunked_array(result))

pandas/core/arrays/boolean.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ def _coerce_to_array(
369369
assert dtype == "boolean"
370370
return coerce_to_array(value, copy=copy)
371371

372-
def _logical_method(self, other, op):
372+
def _logical_method(self, other, op): # type: ignore[override]
373373
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
374374
other_is_scalar = lib.is_scalar(other)
375375
mask = None

pandas/core/arrays/categorical.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2679,16 +2679,28 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
26792679
# ------------------------------------------------------------------------
26802680
# String methods interface
26812681
def _str_map(
2682-
self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True
2682+
self, f, na_value=lib.no_default, dtype=np.dtype("object"), convert: bool = True
26832683
):
26842684
# Optimization to apply the callable `f` to the categories once
26852685
# and rebuild the result by `take`ing from the result with the codes.
26862686
# Returns the same type as the object-dtype implementation though.
2687-
from pandas.core.arrays import NumpyExtensionArray
2688-
26892687
categories = self.categories
26902688
codes = self.codes
2691-
result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
2689+
if categories.dtype == "string":
2690+
result = categories.array._str_map(f, na_value, dtype) # type: ignore[attr-defined]
2691+
if (
2692+
categories.dtype.na_value is np.nan # type: ignore[union-attr]
2693+
and is_bool_dtype(dtype)
2694+
and (na_value is lib.no_default or isna(na_value))
2695+
):
2696+
# NaN propagates as False for functions with boolean return type
2697+
na_value = False
2698+
else:
2699+
from pandas.core.arrays import NumpyExtensionArray
2700+
2701+
result = NumpyExtensionArray(categories.to_numpy())._str_map(
2702+
f, na_value, dtype
2703+
)
26922704
return take_nd(result, codes, fill_value=na_value)
26932705

26942706
def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2918,7 +2918,7 @@ def _generate_range(
29182918
offset: BaseOffset,
29192919
*,
29202920
unit: str,
2921-
) -> Generator[Timestamp, None, None]:
2921+
) -> Generator[Timestamp]:
29222922
"""
29232923
Generates a sequence of dates corresponding to the specified time
29242924
offset. Similar to dateutil.rrule except uses pandas DateOffset

0 commit comments

Comments
 (0)