Skip to content

Commit 7cb0b19

Browse files
committed
Merge remote-tracking branch 'upstream/main' into perf/ri/reindex_return
2 parents ea3c613 + 8fde168 commit 7cb0b19

36 files changed

+373
-116
lines changed

ci/code_checks.sh

-5
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
162162
pandas.Series.dt.qyear\
163163
pandas.Series.dt.unit\
164164
pandas.Series.empty\
165-
pandas.Timedelta.microseconds\
166-
pandas.Timedelta.unit\
167-
pandas.Timedelta.value\
168165
pandas.Timestamp.day\
169166
pandas.Timestamp.fold\
170167
pandas.Timestamp.hour\
@@ -876,11 +873,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
876873
pandas.plotting.parallel_coordinates\
877874
pandas.plotting.radviz\
878875
pandas.plotting.table\
879-
pandas.read_feather\
880876
pandas.read_orc\
881877
pandas.read_sas\
882878
pandas.read_spss\
883-
pandas.read_sql_query\
884879
pandas.read_stata\
885880
pandas.set_eng_float_format\
886881
pandas.timedelta_range\

doc/source/development/maintaining.rst

-28
Original file line numberDiff line numberDiff line change
@@ -326,34 +326,6 @@ a milestone before tagging, you can request the bot to backport it with:
326326
@Meeseeksdev backport <branch>
327327
328328
329-
.. _maintaining.asv-machine:
330-
331-
Benchmark machine
332-
-----------------
333-
334-
The team currently owns dedicated hardware for hosting a website for pandas' ASV performance benchmark. The results
335-
are published to https://asv-runner.github.io/asv-collection/pandas/
336-
337-
Configuration
338-
`````````````
339-
340-
The machine can be configured with the `Ansible <http://docs.ansible.com/ansible/latest/index.html>`_ playbook in https://github.com/tomaugspurger/asv-runner.
341-
342-
Publishing
343-
``````````
344-
345-
The results are published to another GitHub repository, https://github.com/tomaugspurger/asv-collection.
346-
Finally, we have a cron job on our docs server to pull from https://github.com/tomaugspurger/asv-collection, to serve them from ``/speed``.
347-
Ask Tom or Joris for access to the webserver.
348-
349-
Debugging
350-
`````````
351-
352-
The benchmarks are scheduled by Airflow. It has a dashboard for viewing and debugging the results. You'll need to setup an SSH tunnel to view them
353-
354-
ssh -L 8080:localhost:8080 [email protected]
355-
356-
357329
.. _maintaining.release:
358330

359331
Release process

doc/source/whatsnew/v3.0.0.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244

245245
Performance improvements
246246
~~~~~~~~~~~~~~~~~~~~~~~~
247+
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
247248
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
248249
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
249250
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -252,12 +253,12 @@ Performance improvements
252253
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
253254
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
254255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
255257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
256258
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
257259
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
258-
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
259-
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
260260
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
261+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
261262

262263
.. ---------------------------------------------------------------------------
263264
.. _whatsnew_300.bug_fixes:
@@ -266,6 +267,7 @@ Bug fixes
266267
~~~~~~~~~
267268
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
268269
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
270+
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
269271
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
270272

271273
Categorical
@@ -325,6 +327,7 @@ MultiIndex
325327

326328
I/O
327329
^^^
330+
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
328331
-
329332
-
330333

pandas/_libs/tslib.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,8 @@ cpdef array_to_datetime(
598598
is_same_offsets = len(out_tzoffset_vals) == 1
599599
if not is_same_offsets:
600600
raise ValueError(
601-
"cannot parse datetimes with mixed time zones unless `utc=True`"
601+
"Mixed timezones detected. Pass utc=True in to_datetime "
602+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
602603
)
603604
elif state.found_naive or state.found_other:
604605
# e.g. test_to_datetime_mixed_awareness_mixed_types
@@ -610,7 +611,7 @@ cpdef array_to_datetime(
610611
if not tz_compare(tz_out, tz_out2):
611612
# e.g. test_to_datetime_mixed_tzs_mixed_types
612613
raise ValueError(
613-
"Mixed timezones detected. pass utc=True in to_datetime "
614+
"Mixed timezones detected. Pass utc=True in to_datetime "
614615
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
615616
)
616617
# e.g. test_to_datetime_mixed_types_matching_tzs

pandas/_libs/tslibs/dtypes.pxd

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
1111
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
1212
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso)
1313

14-
cpdef freq_to_period_freqstr(freq_n, freq_name)
1514
cdef dict c_OFFSET_TO_PERIOD_FREQSTR
1615
cdef dict c_OFFSET_DEPR_FREQSTR
1716
cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR

pandas/_libs/tslibs/dtypes.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str]
77
def periods_per_day(reso: int = ...) -> int: ...
88
def periods_per_second(reso: int) -> int: ...
99
def abbrev_to_npy_unit(abbrev: str | None) -> int: ...
10-
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...
1110

1211
class PeriodDtypeBase:
1312
_dtype_code: int # PeriodDtypeCode

pandas/_libs/tslibs/dtypes.pyx

-9
Original file line numberDiff line numberDiff line change
@@ -334,15 +334,6 @@ cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR = {
334334
v: k for k, v in c_OFFSET_DEPR_FREQSTR.items()
335335
}
336336

337-
cpdef freq_to_period_freqstr(freq_n, freq_name):
338-
if freq_n == 1:
339-
freqstr = f"""{c_OFFSET_TO_PERIOD_FREQSTR.get(
340-
freq_name, freq_name)}"""
341-
else:
342-
freqstr = f"""{freq_n}{c_OFFSET_TO_PERIOD_FREQSTR.get(
343-
freq_name, freq_name)}"""
344-
return freqstr
345-
346337
# Map deprecated resolution abbreviations to correct resolution abbreviations
347338
cdef dict c_DEPR_ABBREVS = {
348339
"A": "Y",

pandas/_libs/tslibs/offsets.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -4698,7 +4698,7 @@ _lite_rule_alias = {
46984698
"ns": "ns",
46994699
}
47004700

4701-
_dont_uppercase = _dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}
4701+
_dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}
47024702

47034703

47044704
INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"

pandas/_libs/tslibs/period.pyx

+12-11
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,7 @@ from libc.time cimport (
3838
tm,
3939
)
4040

41-
from pandas._libs.tslibs.dtypes cimport (
42-
c_OFFSET_TO_PERIOD_FREQSTR,
43-
freq_to_period_freqstr,
44-
)
41+
from pandas._libs.tslibs.dtypes cimport c_OFFSET_TO_PERIOD_FREQSTR
4542

4643
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
4744

@@ -97,9 +94,6 @@ from pandas._libs.tslibs.dtypes cimport (
9794
attrname_to_abbrevs,
9895
freq_group_code_to_npy_unit,
9996
)
100-
101-
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
102-
10397
from pandas._libs.tslibs.parsing cimport quarter_to_myear
10498

10599
from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso
@@ -1554,7 +1548,7 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray:
15541548
# if we don't raise here, we'll segfault later!
15551549
raise TypeError("extract_ordinals values must be object-dtype")
15561550

1557-
freqstr = freq_to_period_freqstr(freq.n, freq.name)
1551+
freqstr = PeriodDtypeBase(freq._period_dtype_code, freq.n)._freqstr
15581552

15591553
for i in range(n):
15601554
# Analogous to: p = values[i]
@@ -1722,8 +1716,15 @@ cdef class PeriodMixin:
17221716
condition = self.freq != other
17231717

17241718
if condition:
1725-
freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name)
1726-
other_freqstr = freq_to_period_freqstr(other.n, other.name)
1719+
freqstr = PeriodDtypeBase(
1720+
self.freq._period_dtype_code, self.freq.n
1721+
)._freqstr
1722+
if hasattr(other, "_period_dtype_code"):
1723+
other_freqstr = PeriodDtypeBase(
1724+
other._period_dtype_code, other.n
1725+
)._freqstr
1726+
else:
1727+
other_freqstr = other.freqstr
17271728
msg = DIFFERENT_FREQ.format(
17281729
cls=type(self).__name__,
17291730
own_freq=freqstr,
@@ -2479,7 +2480,7 @@ cdef class _Period(PeriodMixin):
24792480
>>> pd.Period('2020-01', 'D').freqstr
24802481
'D'
24812482
"""
2482-
freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name)
2483+
freqstr = PeriodDtypeBase(self.freq._period_dtype_code, self.freq.n)._freqstr
24832484
return freqstr
24842485

24852486
def __repr__(self) -> str:

pandas/_libs/tslibs/strptime.pyx

+6-3
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,8 @@ def array_strptime(
503503
is_same_offsets = len(out_tzoffset_vals) == 1
504504
if not is_same_offsets or (state.found_naive or state.found_other):
505505
raise ValueError(
506-
"cannot parse datetimes with mixed time zones unless `utc=True`"
506+
"Mixed timezones detected. Pass utc=True in to_datetime "
507+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
507508
)
508509
elif tz_out is not None:
509510
# GH#55693
@@ -512,7 +513,8 @@ def array_strptime(
512513
if not tz_compare(tz_out, tz_out2):
513514
# e.g. test_to_datetime_mixed_offsets_with_utc_false_removed
514515
raise ValueError(
515-
"cannot parse datetimes with mixed time zones unless `utc=True`"
516+
"Mixed timezones detected. Pass utc=True in to_datetime "
517+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
516518
)
517519
# e.g. test_guess_datetime_format_with_parseable_formats
518520
else:
@@ -523,7 +525,8 @@ def array_strptime(
523525
if tz_out and (state.found_other or state.found_naive_str):
524526
# found_other indicates a tz-naive int, float, dt64, or date
525527
raise ValueError(
526-
"cannot parse datetimes with mixed time zones unless `utc=True`"
528+
"Mixed timezones detected. Pass utc=True in to_datetime "
529+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
527530
)
528531

529532
if infer_reso:

pandas/_libs/tslibs/timedeltas.pyx

+70
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,25 @@ cdef class _Timedelta(timedelta):
10381038

10391039
@property
10401040
def value(self):
1041+
"""
1042+
Return the value of Timedelta object in nanoseconds.
1043+
1044+
Return the total seconds, milliseconds and microseconds
1045+
of the timedelta as nanoseconds.
1046+
1047+
Returns
1048+
-------
1049+
int
1050+
1051+
See Also
1052+
--------
1053+
Timedelta.unit : Return the unit of Timedelta object.
1054+
1055+
Examples
1056+
--------
1057+
>>> pd.Timedelta(1, "us").value
1058+
1000
1059+
"""
10411060
try:
10421061
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
10431062
except OverflowError:
@@ -1120,6 +1139,37 @@ cdef class _Timedelta(timedelta):
11201139
def microseconds(self) -> int: # TODO(cython3): make cdef property
11211140
# NB: using the python C-API PyDateTime_DELTA_GET_MICROSECONDS will fail
11221141
# (or be incorrect)
1142+
"""
1143+
Return the number of microseconds (n), where 0 <= n < 1 millisecond.
1144+
1145+
Timedelta.microseconds = milliseconds * 1000 + microseconds.
1146+
1147+
Returns
1148+
-------
1149+
int
1150+
Number of microseconds.
1151+
1152+
See Also
1153+
--------
1154+
Timedelta.components : Return all attributes with assigned values
1155+
(i.e. days, hours, minutes, seconds, milliseconds, microseconds,
1156+
nanoseconds).
1157+
1158+
Examples
1159+
--------
1160+
**Using string input**
1161+
1162+
>>> td = pd.Timedelta('1 days 2 min 3 us')
1163+
1164+
>>> td.microseconds
1165+
3
1166+
1167+
**Using integer input**
1168+
1169+
>>> td = pd.Timedelta(42, unit='us')
1170+
>>> td.microseconds
1171+
42
1172+
"""
11231173
self._ensure_components()
11241174
return self._ms * 1000 + self._us
11251175

@@ -1141,6 +1191,26 @@ cdef class _Timedelta(timedelta):
11411191

11421192
@property
11431193
def unit(self) -> str:
1194+
"""
1195+
Return the unit of Timedelta object.
1196+
1197+
The unit of Timedelta object is nanosecond, i.e., 'ns' by default.
1198+
1199+
Returns
1200+
-------
1201+
str
1202+
1203+
See Also
1204+
--------
1205+
Timedelta.value : Return the value of Timedelta object in nanoseconds.
1206+
Timedelta.as_unit : Convert the underlying int64 representation to
1207+
the given unit.
1208+
1209+
Examples
1210+
--------
1211+
>>> td = pd.Timedelta(42, unit='us')
1212+
'ns'
1213+
"""
11441214
return npy_unit_to_abbrev(self._creso)
11451215

11461216
def __hash__(_Timedelta self):

pandas/core/arrays/period.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
from pandas._libs.tslibs.dtypes import (
3838
FreqGroup,
3939
PeriodDtypeBase,
40-
freq_to_period_freqstr,
4140
)
4241
from pandas._libs.tslibs.fields import isleapyear_arr
4342
from pandas._libs.tslibs.offsets import (
@@ -325,7 +324,7 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self:
325324
PeriodArray[freq]
326325
"""
327326
if isinstance(freq, BaseOffset):
328-
freq = freq_to_period_freqstr(freq.n, freq.name)
327+
freq = PeriodDtype(freq)._freqstr
329328
data, freq = dt64arr_to_periodarr(data, freq, tz)
330329
dtype = PeriodDtype(freq)
331330
return cls(data, dtype=dtype)
@@ -399,7 +398,7 @@ def freq(self) -> BaseOffset:
399398

400399
@property
401400
def freqstr(self) -> str:
402-
return freq_to_period_freqstr(self.freq.n, self.freq.name)
401+
return PeriodDtype(self.freq)._freqstr
403402

404403
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
405404
if dtype == "i8":
@@ -1002,13 +1001,17 @@ def raise_on_incompatible(left, right) -> IncompatibleFrequency:
10021001
if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
10031002
other_freq = None
10041003
elif isinstance(right, BaseOffset):
1005-
other_freq = freq_to_period_freqstr(right.n, right.name)
1004+
with warnings.catch_warnings():
1005+
warnings.filterwarnings(
1006+
"ignore", r"PeriodDtype\[B\] is deprecated", category=FutureWarning
1007+
)
1008+
other_freq = PeriodDtype(right)._freqstr
10061009
elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)):
10071010
other_freq = right.freqstr
10081011
else:
10091012
other_freq = delta_to_tick(Timedelta(right)).freqstr
10101013

1011-
own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name)
1014+
own_freq = PeriodDtype(left.freq)._freqstr
10121015
msg = DIFFERENT_FREQ.format(
10131016
cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq
10141017
)

0 commit comments

Comments
 (0)