From a6d2638f8f9d68865a29e72bc2f8e074777d8bd5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 27 Jan 2023 19:48:39 -0800 Subject: [PATCH 1/3] ENH: infer Timestamp unit in non-iso paths --- pandas/_libs/tslibs/conversion.pyx | 28 +++++++++---------- pandas/_libs/tslibs/parsing.pxd | 11 ++++++++ pandas/_libs/tslibs/parsing.pyi | 2 +- pandas/_libs/tslibs/parsing.pyx | 27 ++++++++++++------ pandas/tests/io/parser/test_parse_dates.py | 4 +-- .../scalar/timestamp/test_constructors.py | 8 ++++++ 6 files changed, 55 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5b636ff69a6a6..c24563054655c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -50,29 +50,27 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.timezones cimport ( - get_utcoffset, - is_utc, - maybe_get_tz, -) -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) - -from pandas._libs.tslibs.parsing import parse_datetime_string - from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.parsing cimport parse_datetime_string from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport ( + get_utcoffset, + is_utc, + maybe_get_tz, +) from pandas._libs.tslibs.tzconversion cimport ( Localizer, tz_localize_to_utc_single, ) +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) # ---------------------------------------------------------------------- # Constants @@ -552,8 +550,10 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, return obj dt = parse_datetime_string( - ts, dayfirst=dayfirst, yearfirst=yearfirst + ts, dayfirst=dayfirst, yearfirst=yearfirst, out_bestunit=&out_bestunit ) + reso = get_supported_reso(out_bestunit) + return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=reso) return convert_datetime_to_tsobject(dt, tz) diff --git a/pandas/_libs/tslibs/parsing.pxd b/pandas/_libs/tslibs/parsing.pxd index 25667f00e42b5..8809c81b530d0 100644 --- a/pandas/_libs/tslibs/parsing.pxd +++ b/pandas/_libs/tslibs/parsing.pxd @@ -1,3 +1,14 @@ +from cpython.datetime cimport datetime + +from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT + cpdef str get_rule_month(str source) cpdef quarter_to_myear(int year, int quarter, str freq) + +cdef datetime parse_datetime_string( + str date_string, + bint dayfirst, + bint yearfirst, + NPY_DATETIMEUNIT* out_bestunit +) diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index c5d53f77762f9..83a5b0085f0b4 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -6,7 +6,7 @@ from pandas._typing import npt class DateParseError(ValueError): ... -def parse_datetime_string( +def py_parse_datetime_string( date_string: str, dayfirst: bool = ..., yearfirst: bool = ..., diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index e84b5114df074..c6d8e0e8eb4ee 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -264,14 +264,26 @@ cdef bint _does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def parse_datetime_string( +def py_parse_datetime_string( + str date_string, bint dayfirst=False, bint yearfirst=False +): + # Python-accessible version for testing (we can't just make + # parse_datetime_string cpdef bc it has a pointer argument) + cdef: + NPY_DATETIMEUNIT out_bestunit + + return parse_datetime_string(date_string, dayfirst, yearfirst, &out_bestunit) + + +cdef datetime parse_datetime_string( # NB: This will break with np.str_ (GH#32264) even though # isinstance(npstrobj, str) evaluates to True, so caller must ensure # the argument is *exactly* 'str' str date_string, - bint dayfirst=False, - bint yearfirst=False, -) -> datetime: + bint dayfirst, + bint yearfirst, + NPY_DATETIMEUNIT* out_bestunit +): """ Parse datetime string, only returns datetime. Also cares special handling matching time patterns. @@ -287,7 +299,6 @@ def parse_datetime_string( cdef: datetime dt - NPY_DATETIMEUNIT out_bestunit bint is_quarter = 0 if not _does_string_look_like_datetime(date_string): @@ -299,13 +310,13 @@ def parse_datetime_string( yearfirst=yearfirst) return dt - dt = _parse_delimited_date(date_string, dayfirst, &out_bestunit) + dt = _parse_delimited_date(date_string, dayfirst, out_bestunit) if dt is not None: return dt try: dt = _parse_dateabbr_string( - date_string, _DEFAULT_DATETIME, None, &out_bestunit, &is_quarter + date_string, _DEFAULT_DATETIME, None, out_bestunit, &is_quarter ) return dt except DateParseError: @@ -315,7 +326,7 @@ def parse_datetime_string( dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME, dayfirst=dayfirst, yearfirst=yearfirst, - ignoretz=False, out_bestunit=&out_bestunit) + ignoretz=False, out_bestunit=out_bestunit) return dt diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index fc477a899d089..ad13d676e903d 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -18,7 +18,7 @@ import pytz from pandas._libs.tslibs import parsing -from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas._libs.tslibs.parsing import py_parse_datetime_string from pandas.compat.pyarrow import ( pa_version_under6p0, pa_version_under7p0, @@ -1755,7 +1755,7 @@ def test_hypothesis_delimited_date( date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) except_out_dateutil, result = _helper_hypothesis_delimited_date( - parse_datetime_string, date_string, dayfirst=dayfirst + py_parse_datetime_string, date_string, dayfirst=dayfirst ) except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index c6ceb2fcb0ebd..a80a3cd576587 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -34,6 +34,14 @@ def test_construct_from_string_invalid_raises(self): with pytest.raises(ValueError, match="gives an invalid tzoffset"): Timestamp("200622-12-31") + def test_constructor_str_infer_reso(self): + # non-iso8601 path + ts = Timestamp("2016-01-01 1:30:01 PM") + assert ts.unit == "s" + + ts = Timestamp("2016 June 3 15:25:01.345") + assert ts.unit == "ms" + def test_constructor_from_iso8601_str_with_offset_reso(self): # GH#49737 ts = Timestamp("2016-01-01 04:05:06-01:00") From 1ae7d20ef8ab784a49bb0305219235270a67425f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 30 Jan 2023 20:10:07 -0800 Subject: [PATCH 2/3] test for each of three paths --- pandas/tests/scalar/timestamp/test_constructors.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index a80a3cd576587..3f7db83b9f99a 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -36,6 +36,16 @@ def test_construct_from_string_invalid_raises(self): def test_constructor_str_infer_reso(self): # non-iso8601 path + + # _parse_delimited_date path + ts = Timestamp("2023/01/30") + assert ts.unit == "s" + + # _parse_dateabbr_string path + ts = Timestamp("2015Q1") + assert ts.unit == "s" + + # dateutil_parse path ts = Timestamp("2016-01-01 1:30:01 PM") assert ts.unit == "s" From 6f22a6f597eabf2ad08e8472d681461d5c19a4ef Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 31 Jan 2023 08:00:39 -0800 Subject: [PATCH 3/3] fix test to use desired path --- pandas/tests/scalar/timestamp/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 3f7db83b9f99a..8129985ef9bea 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -38,7 +38,7 @@ def test_constructor_str_infer_reso(self): # non-iso8601 path # _parse_delimited_date path - ts = Timestamp("2023/01/30") + ts = Timestamp("01/30/2023") assert ts.unit == "s" # _parse_dateabbr_string path