Skip to content

Commit a474ec0

Browse files
committed
merge
2 parents a50aa44 + 08431f1 commit a474ec0

File tree

10 files changed

+56
-35
lines changed

10 files changed

+56
-35
lines changed

ci/code_checks.sh

-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9292
-i "pandas.Series.dt.day_name PR01,PR02" \
9393
-i "pandas.Series.dt.floor PR01,PR02" \
9494
-i "pandas.Series.dt.freq GL08" \
95-
-i "pandas.Series.dt.microseconds SA01" \
9695
-i "pandas.Series.dt.month_name PR01,PR02" \
9796
-i "pandas.Series.dt.nanoseconds SA01" \
9897
-i "pandas.Series.dt.normalize PR01" \

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ Performance improvements
528528
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
529529
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
530530
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
531+
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
531532
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
532533
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
533534
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)

pandas/_libs/tslibs/timedeltas.pyx

+10-3
Original file line numberDiff line numberDiff line change
@@ -1434,9 +1434,16 @@ cdef class _Timedelta(timedelta):
14341434
"""
14351435
Convert the Timedelta to a NumPy timedelta64.
14361436

1437-
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
1438-
copy parameters are available here only for compatibility. Their values
1439-
will not affect the return value.
1437+
This is an alias method for `Timedelta.to_timedelta64()`.
1438+
1439+
Parameters
1440+
----------
1441+
dtype : NoneType
1442+
It is available here only for compatibility. Its value will not
1443+
affect the return value.
1444+
copy : bool, default False
1445+
It is available here only for compatibility. Its value will not
1446+
affect the return value.
14401447

14411448
Returns
14421449
-------

pandas/core/arrays/_arrow_string_mixins.py

+15
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,18 @@ def _str_istitle(self):
190190
def _str_isupper(self):
191191
result = pc.utf8_is_upper(self._pa_array)
192192
return self._convert_bool_result(result)
193+
194+
def _str_contains(
195+
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
196+
):
197+
if flags:
198+
raise NotImplementedError(f"contains not implemented with {flags=}")
199+
200+
if regex:
201+
pa_contains = pc.match_substring_regex
202+
else:
203+
pa_contains = pc.match_substring
204+
result = pa_contains(self._pa_array, pat, ignore_case=not case)
205+
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
206+
result = result.fill_null(na)
207+
return self._convert_bool_result(result)

pandas/core/arrays/arrow/array.py

-15
Original file line numberDiff line numberDiff line change
@@ -2322,21 +2322,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
23222322
raise NotImplementedError(f"count not implemented with {flags=}")
23232323
return type(self)(pc.count_substring_regex(self._pa_array, pat))
23242324

2325-
def _str_contains(
2326-
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
2327-
) -> Self:
2328-
if flags:
2329-
raise NotImplementedError(f"contains not implemented with {flags=}")
2330-
2331-
if regex:
2332-
pa_contains = pc.match_substring_regex
2333-
else:
2334-
pa_contains = pc.match_substring
2335-
result = pa_contains(self._pa_array, pat, ignore_case=not case)
2336-
if not isna(na):
2337-
result = result.fill_null(na)
2338-
return type(self)(result)
2339-
23402325
def _result_converter(self, result):
23412326
return type(self)(result)
23422327

pandas/core/arrays/string_arrow.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,8 @@ def insert(self, loc: int, item) -> ArrowStringArray:
223223
raise TypeError("Scalar must be NA or str")
224224
return super().insert(loc, item)
225225

226-
def _convert_bool_result(self, values, na=None):
226+
def _convert_bool_result(self, values):
227227
if self.dtype.na_value is np.nan:
228-
if not isna(na):
229-
values = values.fill_null(bool(na))
230228
return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
231229
return BooleanDtype().__from_arrow__(values)
232230

@@ -304,11 +302,6 @@ def _str_contains(
304302
fallback_performancewarning()
305303
return super()._str_contains(pat, case, flags, na, regex)
306304

307-
if regex:
308-
result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
309-
else:
310-
result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
311-
result = self._convert_bool_result(result, na=na)
312305
if not isna(na):
313306
if not isinstance(na, bool):
314307
# GH#59561
@@ -318,8 +311,9 @@ def _str_contains(
318311
FutureWarning,
319312
stacklevel=find_stack_level(),
320313
)
321-
result[isna(result)] = bool(na)
322-
return result
314+
na = bool(na)
315+
316+
return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
323317

324318
def _str_replace(
325319
self,

pandas/core/arrays/timedeltas.py

+12
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,12 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
876876
microseconds_docstring = textwrap.dedent(
877877
"""Number of microseconds (>= 0 and less than 1 second) for each element.
878878
879+
See Also
880+
--------
881+
pd.Timedelta.microseconds : Number of microseconds (>= 0 and less than 1 second).
882+
pd.Timedelta.to_pytimedelta.microseconds : Number of microseconds (>= 0 and less
883+
than 1 second) of a datetime.timedelta.
884+
879885
Examples
880886
--------
881887
For Series:
@@ -955,6 +961,12 @@ def components(self) -> DataFrame:
955961
-------
956962
DataFrame
957963
964+
See Also
965+
--------
966+
TimedeltaIndex.total_seconds : Return total duration expressed in seconds.
967+
Timedelta.components : Return a components namedtuple-like of a single
968+
timedelta.
969+
958970
Examples
959971
--------
960972
>>> tdelta_idx = pd.to_timedelta(["1 day 3 min 2 us 42 ns"])

pandas/core/dtypes/dtypes.py

+7
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,13 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
611611
dtype = cast(CategoricalDtype, dtype)
612612

613613
# update categories/ordered unless they've been explicitly passed as None
614+
if (
615+
isinstance(dtype, CategoricalDtype)
616+
and dtype.categories is not None
617+
and dtype.ordered is not None
618+
):
619+
# Avoid re-validation in CategoricalDtype constructor
620+
return dtype
614621
new_categories = (
615622
dtype.categories if dtype.categories is not None else self.categories
616623
)

pandas/core/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2812,8 +2812,8 @@ def to_sql(
28122812
`index` is True, then the index names are used.
28132813
A sequence should be given if the DataFrame uses MultiIndex.
28142814
chunksize : int, optional
2815-
Specify the number of rows in each batch to be written at a time.
2816-
By default, all rows will be written at once.
2815+
Specify the number of rows in each batch to be written to the database connection at a time.
2816+
By default, all rows will be written at once. Also see the method keyword.
28172817
dtype : dict or scalar, optional
28182818
Specifying the datatype for columns. If a dictionary is used, the
28192819
keys should be the column names and the values should be the

pandas/tests/arrays/boolean/test_logical.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,20 @@ def test_eq_mismatched_type(self, other):
6060
expected = pd.array([True, True])
6161
tm.assert_extension_array_equal(result, expected)
6262

63-
def test_logical_length_mismatch_raises(self, all_logical_operators):
63+
@pytest.mark.parametrize("other", [[True, False], [True, False, True, False]])
64+
def test_logical_length_mismatch_raises(self, other, all_logical_operators):
6465
op_name = all_logical_operators
6566
a = pd.array([True, False, None], dtype="boolean")
6667
msg = "Lengths must match"
6768

6869
with pytest.raises(ValueError, match=msg):
69-
getattr(a, op_name)([True, False])
70+
getattr(a, op_name)(other)
7071

7172
with pytest.raises(ValueError, match=msg):
72-
getattr(a, op_name)(np.array([True, False]))
73+
getattr(a, op_name)(np.array(other))
7374

7475
with pytest.raises(ValueError, match=msg):
75-
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
76+
getattr(a, op_name)(pd.array(other, dtype="boolean"))
7677

7778
def test_logical_nan_raises(self, all_logical_operators):
7879
op_name = all_logical_operators

0 commit comments

Comments
 (0)