Skip to content

Commit 7b131c0

Browse files
Merge branch 'pandas-dev:main' into main
2 parents d586f72 + 3a45265 commit 7b131c0

File tree

22 files changed

+336
-151
lines changed

22 files changed

+336
-151
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9292
-i "pandas.Series.dt.day_name PR01,PR02" \
9393
-i "pandas.Series.dt.floor PR01,PR02" \
9494
-i "pandas.Series.dt.freq GL08" \
95-
-i "pandas.Series.dt.microseconds SA01" \
9695
-i "pandas.Series.dt.month_name PR01,PR02" \
9796
-i "pandas.Series.dt.nanoseconds SA01" \
9897
-i "pandas.Series.dt.normalize PR01" \
@@ -109,27 +108,19 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109108
-i "pandas.Series.sparse.from_coo PR07,SA01" \
110109
-i "pandas.Series.sparse.npoints SA01" \
111110
-i "pandas.Series.sparse.sp_values SA01" \
112-
-i "pandas.Timedelta.asm8 SA01" \
113-
-i "pandas.Timedelta.ceil SA01" \
114111
-i "pandas.Timedelta.components SA01" \
115-
-i "pandas.Timedelta.floor SA01" \
116112
-i "pandas.Timedelta.max PR02" \
117113
-i "pandas.Timedelta.min PR02" \
118114
-i "pandas.Timedelta.resolution PR02" \
119-
-i "pandas.Timedelta.round SA01" \
120-
-i "pandas.Timedelta.to_numpy PR01" \
121115
-i "pandas.Timedelta.to_timedelta64 SA01" \
122116
-i "pandas.Timedelta.total_seconds SA01" \
123117
-i "pandas.Timedelta.view SA01" \
124-
-i "pandas.TimedeltaIndex.components SA01" \
125-
-i "pandas.TimedeltaIndex.microseconds SA01" \
126118
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
127119
-i "pandas.TimedeltaIndex.seconds SA01" \
128120
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
129121
-i "pandas.Timestamp.nanosecond GL08" \
130122
-i "pandas.Timestamp.resolution PR02" \
131123
-i "pandas.Timestamp.tzinfo GL08" \
132-
-i "pandas.Timestamp.value GL08" \
133124
-i "pandas.Timestamp.year GL08" \
134125
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
135126
-i "pandas.api.types.is_bool PR01,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5758
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5859
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -527,6 +528,7 @@ Performance improvements
527528
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
528529
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
529530
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
531+
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
530532
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
531533
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
532534
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,9 +1421,16 @@ cdef class _Timedelta(timedelta):
14211421
"""
14221422
Convert the Timedelta to a NumPy timedelta64.
14231423

1424-
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
1425-
copy parameters are available here only for compatibility. Their values
1426-
will not affect the return value.
1424+
This is an alias method for `Timedelta.to_timedelta64()`.
1425+
1426+
Parameters
1427+
----------
1428+
dtype : NoneType
1429+
It is available here only for compatibility. Its value will not
1430+
affect the return value.
1431+
copy : bool, default False
1432+
It is available here only for compatibility. Its value will not
1433+
affect the return value.
14271434

14281435
Returns
14291436
-------
@@ -1498,6 +1505,12 @@ cdef class _Timedelta(timedelta):
14981505
numpy timedelta64 array scalar view
14991506
Array scalar view of the timedelta in nanoseconds.
15001507

1508+
See Also
1509+
--------
1510+
Timedelta.total_seconds : Return the total seconds in the duration.
1511+
Timedelta.components : Return a namedtuple of the Timedelta's components.
1512+
Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
1513+
15011514
Examples
15021515
--------
15031516
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2074,12 @@ class Timedelta(_Timedelta):
20612074
------
20622075
ValueError if the freq cannot be converted
20632076
2077+
See Also
2078+
--------
2079+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2080+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2081+
Timestamp.ceil : Similar method for Timestamp objects.
2082+
20642083
Examples
20652084
--------
20662085
>>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2100,16 @@ class Timedelta(_Timedelta):
20812100
Frequency string indicating the flooring resolution.
20822101
It uses the same units as class constructor :class:`~pandas.Timedelta`.
20832102
2103+
Returns
2104+
-------
2105+
Timedelta
2106+
A new Timedelta object floored to the specified resolution.
2107+
2108+
See Also
2109+
--------
2110+
Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
2111+
Timestamp.round : Round the Timestamp to the nearest specified resolution.
2112+
20842113
Examples
20852114
--------
20862115
>>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2130,16 @@ class Timedelta(_Timedelta):
21012130
Frequency string indicating the ceiling resolution.
21022131
It uses the same units as class constructor :class:`~pandas.Timedelta`.
21032132
2133+
Returns
2134+
-------
2135+
Timedelta
2136+
A new Timedelta object ceiled to the specified resolution.
2137+
2138+
See Also
2139+
--------
2140+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2141+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2142+
21042143
Examples
21052144
--------
21062145
>>> td = pd.Timedelta('1001ms')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
240240

241241
@property
242242
def value(self) -> int:
243+
"""
244+
Return the value of the Timestamp.
245+
246+
Returns
247+
-------
248+
int
249+
The integer representation of the Timestamp object in nanoseconds
250+
since the Unix epoch (1970-01-01 00:00:00 UTC).
251+
252+
See Also
253+
--------
254+
Timestamp.second : Return the second of the Timestamp.
255+
Timestamp.minute : Return the minute of the Timestamp.
256+
257+
Examples
258+
--------
259+
>>> ts = pd.Timestamp("2024-08-31 16:16:30")
260+
>>> ts.value
261+
1725120990000000000
262+
"""
263+
243264
try:
244265
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
245266
except OverflowError:
@@ -1066,8 +1087,8 @@ cdef class _Timestamp(ABCTimestamp):
10661087

10671088
See Also
10681089
--------
1069-
Timestamp.day : Return the day of the year.
1070-
Timestamp.year : Return the year of the week.
1090+
Timestamp.day : Return the day of the Timestamp.
1091+
Timestamp.year : Return the year of the Timestamp.
10711092

10721093
Examples
10731094
--------

pandas/_testing/asserters.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def assert_index_equal(
188188
check_order: bool = True,
189189
rtol: float = 1.0e-5,
190190
atol: float = 1.0e-8,
191-
obj: str = "Index",
191+
obj: str | None = None,
192192
) -> None:
193193
"""
194194
Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
217217
Relative tolerance. Only used when check_exact is False.
218218
atol : float, default 1e-8
219219
Absolute tolerance. Only used when check_exact is False.
220-
obj : str, default 'Index'
220+
obj : str, default 'Index' or 'MultiIndex'
221221
Specify object name being compared, internally used to show appropriate
222222
assertion message.
223223
@@ -235,6 +235,9 @@ def assert_index_equal(
235235
"""
236236
__tracebackhide__ = True
237237

238+
if obj is None:
239+
obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
240+
238241
def _check_types(left, right, obj: str = "Index") -> None:
239242
if not exact:
240243
return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
283286
right = cast(MultiIndex, right)
284287

285288
for level in range(left.nlevels):
286-
lobj = f"MultiIndex level [{level}]"
289+
lobj = f"{obj} level [{level}]"
287290
try:
288291
# try comparison on levels/codes to avoid densifying MultiIndex
289292
assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
314317
obj=lobj,
315318
)
316319
# get_level_values may change dtype
317-
_check_types(left.levels[level], right.levels[level], obj=obj)
320+
_check_types(left.levels[level], right.levels[level], obj=lobj)
318321

319322
# skip exact index checking when `check_categorical` is False
320323
elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
527530
kwargs["check_freq"] = False
528531

529532
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
530-
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
533+
assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
531534

532535
assert_attr_equal("closed", left, right, obj=obj)
533536

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,54 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
154154
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
155155
result = result.fill_null(na)
156156
return self._convert_bool_result(result)
157+
158+
def _str_isalnum(self):
159+
result = pc.utf8_is_alnum(self._pa_array)
160+
return self._convert_bool_result(result)
161+
162+
def _str_isalpha(self):
163+
result = pc.utf8_is_alpha(self._pa_array)
164+
return self._convert_bool_result(result)
165+
166+
def _str_isdecimal(self):
167+
result = pc.utf8_is_decimal(self._pa_array)
168+
return self._convert_bool_result(result)
169+
170+
def _str_isdigit(self):
171+
result = pc.utf8_is_digit(self._pa_array)
172+
return self._convert_bool_result(result)
173+
174+
def _str_islower(self):
175+
result = pc.utf8_is_lower(self._pa_array)
176+
return self._convert_bool_result(result)
177+
178+
def _str_isnumeric(self):
179+
result = pc.utf8_is_numeric(self._pa_array)
180+
return self._convert_bool_result(result)
181+
182+
def _str_isspace(self):
183+
result = pc.utf8_is_space(self._pa_array)
184+
return self._convert_bool_result(result)
185+
186+
def _str_istitle(self):
187+
result = pc.utf8_is_title(self._pa_array)
188+
return self._convert_bool_result(result)
189+
190+
def _str_isupper(self):
191+
result = pc.utf8_is_upper(self._pa_array)
192+
return self._convert_bool_result(result)
193+
194+
def _str_contains(
195+
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
196+
):
197+
if flags:
198+
raise NotImplementedError(f"contains not implemented with {flags=}")
199+
200+
if regex:
201+
pa_contains = pc.match_substring_regex
202+
else:
203+
pa_contains = pc.match_substring
204+
result = pa_contains(self._pa_array, pat, ignore_case=not case)
205+
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
206+
result = result.fill_null(na)
207+
return self._convert_bool_result(result)

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,21 +2322,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
23222322
raise NotImplementedError(f"count not implemented with {flags=}")
23232323
return type(self)(pc.count_substring_regex(self._pa_array, pat))
23242324

2325-
def _str_contains(
2326-
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
2327-
) -> Self:
2328-
if flags:
2329-
raise NotImplementedError(f"contains not implemented with {flags=}")
2330-
2331-
if regex:
2332-
pa_contains = pc.match_substring_regex
2333-
else:
2334-
pa_contains = pc.match_substring
2335-
result = pa_contains(self._pa_array, pat, ignore_case=not case)
2336-
if not isna(na):
2337-
result = result.fill_null(na)
2338-
return type(self)(result)
2339-
23402325
def _result_converter(self, result):
23412326
return type(self)(result)
23422327

@@ -2442,33 +2427,6 @@ def _str_slice(
24422427
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
24432428
)
24442429

2445-
def _str_isalnum(self) -> Self:
2446-
return type(self)(pc.utf8_is_alnum(self._pa_array))
2447-
2448-
def _str_isalpha(self) -> Self:
2449-
return type(self)(pc.utf8_is_alpha(self._pa_array))
2450-
2451-
def _str_isdecimal(self) -> Self:
2452-
return type(self)(pc.utf8_is_decimal(self._pa_array))
2453-
2454-
def _str_isdigit(self) -> Self:
2455-
return type(self)(pc.utf8_is_digit(self._pa_array))
2456-
2457-
def _str_islower(self) -> Self:
2458-
return type(self)(pc.utf8_is_lower(self._pa_array))
2459-
2460-
def _str_isnumeric(self) -> Self:
2461-
return type(self)(pc.utf8_is_numeric(self._pa_array))
2462-
2463-
def _str_isspace(self) -> Self:
2464-
return type(self)(pc.utf8_is_space(self._pa_array))
2465-
2466-
def _str_istitle(self) -> Self:
2467-
return type(self)(pc.utf8_is_title(self._pa_array))
2468-
2469-
def _str_isupper(self) -> Self:
2470-
return type(self)(pc.utf8_is_upper(self._pa_array))
2471-
24722430
def _str_len(self) -> Self:
24732431
return type(self)(pc.utf8_length(self._pa_array))
24742432

0 commit comments

Comments
 (0)