Skip to content

Commit d2d1797

Browse files
authored
PERF: Speed up Period construction (#50149)
* PERF: Speed up Period construction * Try to fix CI? * Avoid hackiness * debug CI * Modify condition * revert whitespace * fix tests
1 parent 3ec3ac2 commit d2d1797

File tree

3 files changed

+11
-13
lines changed

3 files changed

+11
-13
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,7 @@ Performance improvements
868868
- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
869869
- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`)
870870
- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`)
871+
- Performance improvement in :class:`Period` constructor when constructing from a string or integer (:issue:`38312`)
871872
- Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`)
872873
- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
873874
- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`)

pandas/_libs/tslibs/parsing.pyx

+6-4
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,11 @@ def parse_datetime_string_with_reso(
377377
&out_tzoffset, False
378378
)
379379
if not string_to_dts_failed:
380-
if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns or out_local:
380+
timestamp_units = {NPY_DATETIMEUNIT.NPY_FR_ns,
381+
NPY_DATETIMEUNIT.NPY_FR_ps,
382+
NPY_DATETIMEUNIT.NPY_FR_fs,
383+
NPY_DATETIMEUNIT.NPY_FR_as}
384+
if out_bestunit in timestamp_units or out_local:
381385
# TODO: the not-out_local case we could do without Timestamp;
382386
# avoid circular import
383387
from pandas import Timestamp
@@ -389,9 +393,7 @@ def parse_datetime_string_with_reso(
389393
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
390394
# The new resolution will just be nano
391395
# GH 50417
392-
if out_bestunit in {NPY_DATETIMEUNIT.NPY_FR_ps,
393-
NPY_DATETIMEUNIT.NPY_FR_fs,
394-
NPY_DATETIMEUNIT.NPY_FR_as}:
396+
if out_bestunit in timestamp_units:
395397
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
396398
reso = {
397399
NPY_DATETIMEUNIT.NPY_FR_Y: "year",

pandas/_libs/tslibs/period.pyx

+4-9
Original file line numberDiff line numberDiff line change
@@ -2592,18 +2592,13 @@ class Period(_Period):
25922592

25932593
freqstr = freq.rule_code if freq is not None else None
25942594
dt, reso = parse_datetime_string_with_reso(value, freqstr)
2595-
try:
2596-
ts = Timestamp(value)
2597-
except ValueError:
2598-
nanosecond = 0
2599-
else:
2600-
nanosecond = ts.nanosecond
2601-
if nanosecond != 0:
2602-
reso = "nanosecond"
2595+
if reso == "nanosecond":
2596+
nanosecond = dt.nanosecond
26032597
if dt is NaT:
26042598
ordinal = NPY_NAT
26052599

2606-
if freq is None:
2600+
if freq is None and ordinal != NPY_NAT:
2601+
# Skip NaT, since it doesn't have a resolution
26072602
try:
26082603
freq = attrname_to_abbrevs[reso]
26092604
except KeyError:

0 commit comments

Comments
 (0)