From 2b6e9f66c9b3783bca96bc6228d03ce77a631985 Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Thu, 17 Jun 2021 18:40:33 +0900 Subject: [PATCH 1/9] fix to_datetime with infer_datetime_format drop tz --- pandas/_libs/tslibs/parsing.pyx | 4 ++++ pandas/tests/tools/test_to_datetime.py | 17 +++++++++++++++++ pandas/tests/tslibs/test_parsing.py | 26 ++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 9892671f5c18c..faf04d6be972c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -825,6 +825,10 @@ def format_is_iso(f: str) -> bint: iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] + if (f is not None) and (f[-2:] in ["SZ", "fZ"]): + # remove last 'Z' + f = f[:-1] + for date_sep in [' ', '/', '\\', '-', '.', '']: for time_sep in [' ', 'T']: if (iso_template(date_sep=date_sep, diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 121ca99785831..589aeeb46e019 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1941,6 +1941,23 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): ) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "ts,zero_tz,is_utc", + [ + ("2019-02-02 08:07:13", "Z", True), + ("2019-02-02 08:07:13", "", False), + ("2019-02-02 08:07:13.012345", "Z", True), + ("2019-02-02 08:07:13.012345", "", False), + ], + ) + def test_infer_datetime_format_zero_tz(self, ts, zero_tz, is_utc): + # GH 4104 + s = Series([ts + zero_tz]) + result = to_datetime(s, infer_datetime_format=True) + tz = pytz.utc if is_utc else None + expected = Series([Timestamp(ts, tz=tz)]) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871 diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index e580b9112f3ec..f9b5b9cb791d9 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -226,3 +226,29 @@ def test_parse_time_string_check_instance_type_raise_exception(): result = parse_time_string("2019") expected = (datetime(2019, 1, 1), "year") assert result == expected + + +@pytest.mark.parametrize( + "fmt,expected", + [ + ("%Y %m %d %H:%M:%S", True), + ("%Y/%m/%d %H:%M:%S", True), + (r"%Y\%m\%d %H:%M:%S", True), + ("%Y-%m-%d %H:%M:%S", True), + ("%Y.%m.%d %H:%M:%S", True), + ("%Y%m%d %H:%M:%S", True), + ("%Y-%m-%dT%H:%M:%S", True), + ("%Y-%m-%dT%H:%M:%SZ", True), + ("%Y-%m-%dT%H:%M:%S.%f", True), + ("%Y-%m-%dT%H:%M:%S.%fZ", True), + ("%Y%m%d", False), + ("%Y%m", False), + ("%Y", False), + ("%Y-%m-%d", True), + ("%Y-%m", True), + ], +) +def test_is_iso_format(fmt, expected): + # see gh-41047 + result = parsing.format_is_iso(fmt) + assert result == expected From 09a510882540b0eaf007ad96105dd5cd9c01aafa Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Thu, 17 Jun 2021 20:09:11 +0900 Subject: [PATCH 2/9] Fix fix reference issue number --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 589aeeb46e019..14ddc625cffe8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1951,7 +1951,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): ], ) def test_infer_datetime_format_zero_tz(self, ts, zero_tz, is_utc): - # GH 4104 + # GH 41047 s = Series([ts + zero_tz]) result = to_datetime(s, infer_datetime_format=True) tz = pytz.utc if is_utc else None From 1886b22de890fcc82158054f378db10e87abd756 Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Sat, 19 Jun 2021 11:39:00 +0900 Subject: [PATCH 3/9] make guess_datetime_format parse more timezon pattern --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/_libs/tslibs/parsing.pyx | 23 ++++++++++++++++++++--- pandas/tests/tslibs/test_parsing.py | 14 ++++++++++++-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1a5a9980e5e96..a8e19272b10aa 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -932,6 +932,7 @@ Timezones ^^^^^^^^^ - Bug in different ``tzinfo`` objects representing UTC not being treated as equivalent (:issue:`39216`) - Bug in ``dateutil.tz.gettz("UTC")`` not being recognized as equivalent to other UTC-representing tzinfos (:issue:`39276`) +- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` failing to parse zero UTC offset (``Z``) correctly (:issue:`41047`) Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index faf04d6be972c..03fa214e04590 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -825,9 +825,9 @@ def format_is_iso(f: str) -> bint: iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] - if (f is not None) and (f[-2:] in ["SZ", "fZ"]): - # remove last 'Z' - f = f[:-1] + if (f is not None) and (f[-3:] in ["S%z", "S%Z", "f%z", "f%Z"]): + # remove the last '%z' or '%Z'. + f = f[:-2] for date_sep in [' ', '/', '\\', '-', '.', '']: for time_sep in [' ', 'T']: @@ -888,6 +888,7 @@ def guess_datetime_format( (('second',), '%S', 2), (('microsecond',), '%f', 6), (('second', 'microsecond'), '%S.%f', 0), + (('tzinfo',), '%z', 0), (('tzinfo',), '%Z', 0), ] @@ -908,6 +909,22 @@ def guess_datetime_format( # that any user-provided function will not either. tokens = dt_str_split(dt_str) + # Normalize timezone tokens + if (parsed_datetime.tzinfo is not None) and len(tokens) > 1: + if tokens[-1] == "Z": + # the last "Z" means zero offset + tokens[-1] = "+0000" + else: + # If the input string has a timezone offset like '+0900', + # the offset is separated into two tokens, ex. ['+', '0900’]. + # This separation will prevent subsequent processing + # from correctly parsing the time zone format. + # So rejoin them here. + offset_candidate = ''.join(tokens[-2:]) + if re.match(r"(\+|-)\d{4}$", offset_candidate): + tokens[-2] = offset_candidate + tokens = tokens[:-1] + format_guess = [None] * len(tokens) found_attrs = set() diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index f9b5b9cb791d9..ba0e677bfdf84 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -144,6 +144,14 @@ def test_parsers_month_freq(date_str, expected): ("30-12-2011", "%d-%m-%Y"), ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), + ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"), + ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"), + ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), ], ) @@ -238,9 +246,11 @@ def test_parse_time_string_check_instance_type_raise_exception(): ("%Y.%m.%d %H:%M:%S", True), ("%Y%m%d %H:%M:%S", True), ("%Y-%m-%dT%H:%M:%S", True), - ("%Y-%m-%dT%H:%M:%SZ", True), + ("%Y-%m-%dT%H:%M:%S%z", True), + ("%Y-%m-%dT%H:%M:%S%Z", True), ("%Y-%m-%dT%H:%M:%S.%f", True), - ("%Y-%m-%dT%H:%M:%S.%fZ", True), + ("%Y-%m-%dT%H:%M:%S.%f%z", True), + ("%Y-%m-%dT%H:%M:%S.%f%Z", True), ("%Y%m%d", False), ("%Y%m", False), ("%Y", False), From 309b5d916f134a206188cf0c09df39424f2b9726 Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Wed, 23 Jun 2021 13:19:45 +0900 Subject: [PATCH 4/9] make guess_datetime_format can parse offset with separator --- pandas/_libs/tslibs/parsing.pyx | 67 +++++++++++++++++++++-------- pandas/tests/tslibs/test_parsing.py | 14 ++++++ 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 03fa214e04590..c61d3da8c7ea0 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -822,19 +822,17 @@ def format_is_iso(f: str) -> bint: Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different but must be consistent. Leading 0s in dates and times are optional. """ - iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format + iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}'.format excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] - if (f is not None) and (f[-3:] in ["S%z", "S%Z", "f%z", "f%Z"]): - # remove the last '%z' or '%Z'. - f = f[:-2] - for date_sep in [' ', '/', '\\', '-', '.', '']: for time_sep in [' ', 'T']: - if (iso_template(date_sep=date_sep, - time_sep=time_sep - ).startswith(f) and f not in excluded_formats): - return True + for micro_or_tz in ['', '%z', '%Z', '.%f', '.%f%z', '.%f%Z']: + if (iso_template(date_sep=date_sep, + time_sep=time_sep, + micro_or_tz=micro_or_tz, + ).startswith(f) and f not in excluded_formats): + return True return False @@ -909,21 +907,52 @@ def guess_datetime_format( # that any user-provided function will not either. tokens = dt_str_split(dt_str) - # Normalize timezone tokens - if (parsed_datetime.tzinfo is not None) and len(tokens) > 1: - if tokens[-1] == "Z": - # the last "Z" means zero offset - tokens[-1] = "+0000" + # Normalize offset part of tokens. + # There are multiple formats for the timezone offset. + # To pass the comparison condition between the output of `strftime` and + # joined tokens, which is carried out at the final step of the function, + # the offset part of the tokens must match the '%z' format like '+0900' + # instead of ‘+09:00’. + if (parsed_datetime.tzinfo is not None): + if (len(tokens) > 0) and (tokens[-1] == 'Z'): + # the last 'Z' means zero offset + tokens[-1] = '+0000' else: # If the input string has a timezone offset like '+0900', # the offset is separated into two tokens, ex. ['+', '0900’]. # This separation will prevent subsequent processing # from correctly parsing the time zone format. - # So rejoin them here. - offset_candidate = ''.join(tokens[-2:]) - if re.match(r"(\+|-)\d{4}$", offset_candidate): - tokens[-2] = offset_candidate - tokens = tokens[:-1] + # So in addition to the format nomalization, we rejoin them here. + if ( + (len(tokens) > 3) + and tokens[-1].isdigit() + and (tokens[-2] == ':') + and tokens[-3].isdigit() + and (tokens[-4] in ('+', '-')) + ): + # ex. [..., '+', '9', ':', '5'] -> [..., '+0905'] + offset_idx = -4 + sign, hour_offset, _, min_offset = tokens[offset_idx:] + tokens[offset_idx] = ( + f'{sign}{int(hour_offset):02d}{int(min_offset):02d}' + ) + tokens = tokens[:offset_idx + 1] + elif ( + (len(tokens) > 1) + and tokens[-1].isdigit() + and (tokens[-2] in ('+', '-')) + ): + # ex. [..., '+', '0905'] -> [..., '+0905'] + offset_idx = -2 + sign, offset = tokens[offset_idx:] + if len(offset) <= 2: + # '+09' -> '+0900' + tokens[offset_idx] = f'{sign}{int(offset):02d}00' + else: + tokens[offset_idx] = f'{sign}{int(offset):04d}' + tokens = tokens[:offset_idx + 1] + + # else: Other patterns are tried to parse as a timezone name. format_guess = [None] * len(tokens) found_attrs = set() diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index ba0e677bfdf84..3992457c9c361 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -146,12 +146,26 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"), ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+090", None), ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"), ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:", None), ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"), ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+090", None), ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), ], ) From 7198d51b49388fc9301c639bd0b3dd89720c223a Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Thu, 24 Jun 2021 15:59:04 +0900 Subject: [PATCH 5/9] remove redundant parentheses and separator check --- pandas/_libs/tslibs/parsing.pyx | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index c61d3da8c7ea0..32ed16bfaf531 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -913,8 +913,8 @@ def guess_datetime_format( # joined tokens, which is carried out at the final step of the function, # the offset part of the tokens must match the '%z' format like '+0900' # instead of ‘+09:00’. - if (parsed_datetime.tzinfo is not None): - if (len(tokens) > 0) and (tokens[-1] == 'Z'): + if parsed_datetime.tzinfo is not None: + if len(tokens) > 0 and tokens[-1] == 'Z': # the last 'Z' means zero offset tokens[-1] = '+0000' else: @@ -924,11 +924,10 @@ def guess_datetime_format( # from correctly parsing the time zone format. # So in addition to the format nomalization, we rejoin them here. if ( - (len(tokens) > 3) + len(tokens) > 3 and tokens[-1].isdigit() - and (tokens[-2] == ':') and tokens[-3].isdigit() - and (tokens[-4] in ('+', '-')) + and tokens[-4] in ('+', '-') ): # ex. [..., '+', '9', ':', '5'] -> [..., '+0905'] offset_idx = -4 @@ -938,9 +937,9 @@ def guess_datetime_format( ) tokens = tokens[:offset_idx + 1] elif ( - (len(tokens) > 1) + len(tokens) > 1 and tokens[-1].isdigit() - and (tokens[-2] in ('+', '-')) + and tokens[-2] in ('+', '-') ): # ex. [..., '+', '0905'] -> [..., '+0905'] offset_idx = -2 From f42be84e944f59fcdd18eecf92827740566fc792 Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Wed, 30 Jun 2021 17:27:22 +0900 Subject: [PATCH 6/9] Simplify timezone offset handling. --- pandas/_libs/tslibs/parsing.pyx | 43 +++++++++------------------------ 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 32ed16bfaf531..c4e95eaf1063d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -914,44 +914,25 @@ def guess_datetime_format( # the offset part of the tokens must match the '%z' format like '+0900' # instead of ‘+09:00’. if parsed_datetime.tzinfo is not None: + offset_index = None if len(tokens) > 0 and tokens[-1] == 'Z': # the last 'Z' means zero offset - tokens[-1] = '+0000' - else: + offset_index = -1 + elif len(tokens) > 1 and tokens[-2] in ('+', '-'): + # ex. [..., '+', '0900'] + offset_index = -2 + elif len(tokens) > 3 and tokens[-4] in ('+', '-'): + # ex. [..., '+', '09', ':', '00'] + offset_index = -4 + + if offset_index is not None: # If the input string has a timezone offset like '+0900', # the offset is separated into two tokens, ex. ['+', '0900’]. # This separation will prevent subsequent processing # from correctly parsing the time zone format. # So in addition to the format nomalization, we rejoin them here. - if ( - len(tokens) > 3 - and tokens[-1].isdigit() - and tokens[-3].isdigit() - and tokens[-4] in ('+', '-') - ): - # ex. [..., '+', '9', ':', '5'] -> [..., '+0905'] - offset_idx = -4 - sign, hour_offset, _, min_offset = tokens[offset_idx:] - tokens[offset_idx] = ( - f'{sign}{int(hour_offset):02d}{int(min_offset):02d}' - ) - tokens = tokens[:offset_idx + 1] - elif ( - len(tokens) > 1 - and tokens[-1].isdigit() - and tokens[-2] in ('+', '-') - ): - # ex. [..., '+', '0905'] -> [..., '+0905'] - offset_idx = -2 - sign, offset = tokens[offset_idx:] - if len(offset) <= 2: - # '+09' -> '+0900' - tokens[offset_idx] = f'{sign}{int(offset):02d}00' - else: - tokens[offset_idx] = f'{sign}{int(offset):04d}' - tokens = tokens[:offset_idx + 1] - - # else: Other patterns are tried to parse as a timezone name. + tokens[offset_index] = parsed_datetime.strftime("%z") + tokens = tokens[:offset_index + 1 or None] format_guess = [None] * len(tokens) found_attrs = set() From a031e39e50ced966df8078b1bc51f4aebb323c8c Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Sat, 7 Aug 2021 09:35:59 +0900 Subject: [PATCH 7/9] fix whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 1 - doc/source/whatsnew/v1.4.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 58836e788db27..ed66861efad93 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -944,7 +944,6 @@ Timezones ^^^^^^^^^ - Bug in different ``tzinfo`` objects representing UTC not being treated as equivalent (:issue:`39216`) - Bug in ``dateutil.tz.gettz("UTC")`` not being recognized as equivalent to other UTC-representing tzinfos (:issue:`39276`) -- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` failing to parse zero UTC offset (``Z``) correctly (:issue:`41047`) Numeric ^^^^^^^ diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fa9c424351b00..97054944883f3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -205,7 +205,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` failing to parse zero UTC offset (``Z``) correctly (:issue:`41047`) - Numeric From a5136edcb108023768596a92d480aef7ae999c6e Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Tue, 14 Sep 2021 13:33:16 +0900 Subject: [PATCH 8/9] Add to_datetime(,infer_datetime_format=True) bench --- asv_bench/benchmarks/inference.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 4cbaa184791b8..7544a80b3166c 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -173,6 +173,9 @@ def setup(self): self.strings_tz_space = [ x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng ] + self.strings_zero_tz = [ + x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng + ] def time_iso8601(self): to_datetime(self.strings) @@ -189,6 +192,10 @@ def time_iso8601_format_no_sep(self): def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) + def time_iso8601_infer_zero_tz_fromat(self): + #GH 41047 + to_datetime(self.strings_zero_tz, infer_datetime_format=True) + class ToDatetimeNONISO8601: def setup(self): From 05381672b8d3c1b5c0f9e48ee69a5ae2124b2459 Mon Sep 17 00:00:00 2001 From: akiyuki ishikawa Date: Tue, 14 Sep 2021 13:50:01 +0900 Subject: [PATCH 9/9] Fix style --- asv_bench/benchmarks/inference.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 7544a80b3166c..769889dfbe75d 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -173,9 +173,7 @@ def setup(self): self.strings_tz_space = [ x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng ] - self.strings_zero_tz = [ - x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng - ] + self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng] def time_iso8601(self): to_datetime(self.strings) @@ -193,7 +191,7 @@ def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) def time_iso8601_infer_zero_tz_fromat(self): - #GH 41047 + # GH 41047 to_datetime(self.strings_zero_tz, infer_datetime_format=True)