Skip to content

ENH: infer Timestamp unit in non-iso paths #51039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,27 @@ from pandas._libs.tslibs.np_datetime cimport (

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime

from pandas._libs.tslibs.timezones cimport (
get_utcoffset,
is_utc,
maybe_get_tz,
)
from pandas._libs.tslibs.util cimport (
is_datetime64_object,
is_float_object,
is_integer_object,
)

from pandas._libs.tslibs.parsing import parse_datetime_string

from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.parsing cimport parse_datetime_string
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport (
get_utcoffset,
is_utc,
maybe_get_tz,
)
from pandas._libs.tslibs.tzconversion cimport (
Localizer,
tz_localize_to_utc_single,
)
from pandas._libs.tslibs.util cimport (
is_datetime64_object,
is_float_object,
is_integer_object,
)

# ----------------------------------------------------------------------
# Constants
Expand Down Expand Up @@ -552,8 +550,10 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
return obj

dt = parse_datetime_string(
ts, dayfirst=dayfirst, yearfirst=yearfirst
ts, dayfirst=dayfirst, yearfirst=yearfirst, out_bestunit=&out_bestunit
)
reso = get_supported_reso(out_bestunit)
return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=reso)

return convert_datetime_to_tsobject(dt, tz)

Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/tslibs/parsing.pxd
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
from cpython.datetime cimport datetime

from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cpdef str get_rule_month(str source)
cpdef quarter_to_myear(int year, int quarter, str freq)

cdef datetime parse_datetime_string(
str date_string,
bint dayfirst,
bint yearfirst,
NPY_DATETIMEUNIT* out_bestunit
)
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/parsing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from pandas._typing import npt

class DateParseError(ValueError): ...

def parse_datetime_string(
def py_parse_datetime_string(
date_string: str,
dayfirst: bool = ...,
yearfirst: bool = ...,
Expand Down
27 changes: 19 additions & 8 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,26 @@ cdef bint _does_string_look_like_time(str parse_string):
return 0 <= hour <= 23 and 0 <= minute <= 59


def parse_datetime_string(
def py_parse_datetime_string(
str date_string, bint dayfirst=False, bint yearfirst=False
):
# Python-accessible version for testing (we can't just make
# parse_datetime_string cpdef bc it has a pointer argument)
cdef:
NPY_DATETIMEUNIT out_bestunit

return parse_datetime_string(date_string, dayfirst, yearfirst, &out_bestunit)


cdef datetime parse_datetime_string(
# NB: This will break with np.str_ (GH#32264) even though
# isinstance(npstrobj, str) evaluates to True, so caller must ensure
# the argument is *exactly* 'str'
str date_string,
bint dayfirst=False,
bint yearfirst=False,
) -> datetime:
bint dayfirst,
bint yearfirst,
NPY_DATETIMEUNIT* out_bestunit
):
"""
Parse datetime string, only returns datetime.
Also cares special handling matching time patterns.
Expand All @@ -287,7 +299,6 @@ def parse_datetime_string(

cdef:
datetime dt
NPY_DATETIMEUNIT out_bestunit
bint is_quarter = 0

if not _does_string_look_like_datetime(date_string):
Expand All @@ -299,13 +310,13 @@ def parse_datetime_string(
yearfirst=yearfirst)
return dt

dt = _parse_delimited_date(date_string, dayfirst, &out_bestunit)
dt = _parse_delimited_date(date_string, dayfirst, out_bestunit)
if dt is not None:
return dt

try:
dt = _parse_dateabbr_string(
date_string, _DEFAULT_DATETIME, None, &out_bestunit, &is_quarter
date_string, _DEFAULT_DATETIME, None, out_bestunit, &is_quarter
)
return dt
except DateParseError:
Expand All @@ -315,7 +326,7 @@ def parse_datetime_string(

dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst,
ignoretz=False, out_bestunit=&out_bestunit)
ignoretz=False, out_bestunit=out_bestunit)
return dt


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pytz

from pandas._libs.tslibs import parsing
from pandas._libs.tslibs.parsing import parse_datetime_string
from pandas._libs.tslibs.parsing import py_parse_datetime_string
from pandas.compat.pyarrow import (
pa_version_under6p0,
pa_version_under7p0,
Expand Down Expand Up @@ -1760,7 +1760,7 @@ def test_hypothesis_delimited_date(
date_string = test_datetime.strftime(date_format.replace(" ", delimiter))

except_out_dateutil, result = _helper_hypothesis_delimited_date(
parse_datetime_string, date_string, dayfirst=dayfirst
py_parse_datetime_string, date_string, dayfirst=dayfirst
)
except_in_dateutil, expected = _helper_hypothesis_delimited_date(
du_parse,
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,24 @@ def test_construct_from_string_invalid_raises(self):
with pytest.raises(ValueError, match="gives an invalid tzoffset"):
Timestamp("200622-12-31")

def test_constructor_str_infer_reso(self):
# non-iso8601 path

# _parse_delimited_date path
ts = Timestamp("01/30/2023")
assert ts.unit == "s"

# _parse_dateabbr_string path
ts = Timestamp("2015Q1")
assert ts.unit == "s"

# dateutil_parse path
ts = Timestamp("2016-01-01 1:30:01 PM")
assert ts.unit == "s"

ts = Timestamp("2016 June 3 15:25:01.345")
assert ts.unit == "ms"

def test_constructor_from_iso8601_str_with_offset_reso(self):
# GH#49737
ts = Timestamp("2016-01-01 04:05:06-01:00")
Expand Down