From 82b75058eaffb08f2010df99b4913c185ccfaa79 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Wed, 28 Dec 2022 20:35:23 +0530 Subject: [PATCH 01/17] performace: used regular expression 50465 --- pandas/_libs/tslibs/parsing.pyx | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 26317da62c8d9..529cc5bdaeb4b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -824,17 +824,11 @@ def format_is_iso(f: str) -> bint: Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different but must be consistent. Leading 0s in dates and times are optional. """ - iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format + iso_regex = \ + r"^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(([+-]\d{2}:\d{2})|Z)?)?)?)?$" excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] - - for date_sep in [" ", "/", "\\", "-", ".", ""]: - for time_sep in [" ", "T"]: - for micro_or_tz in ["", "%z", ".%f", ".%f%z"]: - if (iso_template(date_sep=date_sep, - time_sep=time_sep, - micro_or_tz=micro_or_tz, - ).startswith(f) and f not in excluded_formats): - return True + if re.match(iso_regex, f) and f not in excluded_formats: + return True return False From 67d56121f6239e6cbc19a6d3f2f1a2d2562dca28 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Wed, 28 Dec 2022 21:34:03 +0530 Subject: [PATCH 02/17] added comments for the code --- pandas/_libs/tslibs/parsing.pyx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 529cc5bdaeb4b..3c2bf9bf6eefe 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -826,7 +826,22 @@ def format_is_iso(f: str) -> bint: """ iso_regex = \ r"^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(([+-]\d{2}:\d{2})|Z)?)?)?)?$" + + # ^ -> matches the start of the string + # \d{4} -> matches the four digit(0-9) which represent the year + # -\d{2} -> matches the dash followed by 2 digits + # T\d{2}:\d{2}:\d{2} \ + -> "T" followed by three groups seperated by colon: represent H:M:S + # (\.\d+)? -> represents the fractions of seconds + # (([+-]\d{2}:\d{2})|Z)? -> optional part match the time zone info + # ? -> The ? at the end makes the part of regex optional + # $ -> matches with the end + excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] + + # uses the 're' module to check if the string matches the regular + # expression and not in the list of 'excluded_formats' then it will return true + if re.match(iso_regex, f) and f not in excluded_formats: return True return False From 9b13481fa82b2e98c0ec0779923476663383a8b6 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Wed, 28 Dec 2022 21:41:41 +0530 Subject: [PATCH 03/17] fixed broken comment --- pandas/_libs/tslibs/parsing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3c2bf9bf6eefe..70c7aade82e2f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -831,7 +831,7 @@ def format_is_iso(f: str) -> bint: # \d{4} -> matches the four digit(0-9) which represent the year # -\d{2} -> matches the dash followed by 2 digits # T\d{2}:\d{2}:\d{2} \ - -> "T" followed by three groups seperated by colon: represent H:M:S + # -> "T" followed by three groups seperated by colon: represent H:M:S # (\.\d+)? -> represents the fractions of seconds # (([+-]\d{2}:\d{2})|Z)? -> optional part match the time zone info # ? -> The ? at the end makes the part of regex optional From 23679e44f18bbc6879200c36adbef75bfeb13760 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Wed, 28 Dec 2022 23:14:27 +0530 Subject: [PATCH 04/17] verbose regex --- pandas/_libs/tslibs/parsing.pyx | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 70c7aade82e2f..fb3254b291714 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -824,24 +824,20 @@ def format_is_iso(f: str) -> bint: Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different but must be consistent. Leading 0s in dates and times are optional. """ - iso_regex = \ - r"^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(([+-]\d{2}:\d{2})|Z)?)?)?)?$" - - # ^ -> matches the start of the string - # \d{4} -> matches the four digit(0-9) which represent the year - # -\d{2} -> matches the dash followed by 2 digits - # T\d{2}:\d{2}:\d{2} \ - # -> "T" followed by three groups seperated by colon: represent H:M:S - # (\.\d+)? -> represents the fractions of seconds - # (([+-]\d{2}:\d{2})|Z)? -> optional part match the time zone info - # ? -> The ? at the end makes the part of regex optional - # $ -> matches with the end - + iso_regex = re.compile( + r""" + ^ # start of string + \d{4} # match a 4-digit year + (-\d{2})? # optionally match a 2-digit month + (-\d{2})? # optionally match a 2-digit day + (T\d{2}:\d{2}:\d{2} # match time in the format "THH:MM:SS" + (\.\d+)? # optionally match a decimal and fractional seconds + ([+-]\d{2}:\d{2}|Z)?)? # optional match timezone in the format "+HH:MM" or "Z" + $ # end of string + """, + re.VERBOSE, + ) excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] - - # uses the 're' module to check if the string matches the regular - # expression and not in the list of 'excluded_formats' then it will return true - if re.match(iso_regex, f) and f not in excluded_formats: return True return False From f30c0c2efc11204f717147d932435ec6566106ed Mon Sep 17 00:00:00 2001 From: Shashwat Date: Thu, 29 Dec 2022 00:08:05 +0530 Subject: [PATCH 05/17] added more seperators --- pandas/_libs/tslibs/parsing.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index fb3254b291714..556470f6f632d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -828,9 +828,9 @@ def format_is_iso(f: str) -> bint: r""" ^ # start of string \d{4} # match a 4-digit year - (-\d{2})? # optionally match a 2-digit month - (-\d{2})? # optionally match a 2-digit day - (T\d{2}:\d{2}:\d{2} # match time in the format "THH:MM:SS" + ([ -/\\.]\d{2}|\d{2})? # optionally match a 2-digit month + ([ -/\\.]\d{2}|\d{2})? # optionally match a 2-digit day + ([ T]\d{2}:\d{2}:\d{2} # match time in the format "THH:MM:SS" (\.\d+)? # optionally match a decimal and fractional seconds ([+-]\d{2}:\d{2}|Z)?)? # optional match timezone in the format "+HH:MM" or "Z" $ # end of string From 6070a00abcd8599569c53ad45b2201ed741214ee Mon Sep 17 00:00:00 2001 From: Shashwat Date: Thu, 29 Dec 2022 19:42:58 +0530 Subject: [PATCH 06/17] working excluding format --- pandas/_libs/tslibs/parsing.pyx | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 556470f6f632d..0af89b88de07f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -818,7 +818,7 @@ class _timelex: _DATEUTIL_LEXER_SPLIT = _timelex.split -def format_is_iso(f: str) -> bint: +def format_is_iso(f: str) -> bool: """ Does format match the iso8601 set that can be handled by the C parser? Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different @@ -831,16 +831,23 @@ def format_is_iso(f: str) -> bint: ([ -/\\.]\d{2}|\d{2})? # optionally match a 2-digit month ([ -/\\.]\d{2}|\d{2})? # optionally match a 2-digit day ([ T]\d{2}:\d{2}:\d{2} # match time in the format "THH:MM:SS" - (\.\d+)? # optionally match a decimal and fractional seconds - ([+-]\d{2}:\d{2}|Z)?)? # optional match timezone in the format "+HH:MM" or "Z" + (\.\d+)? # match a decimal and fractional seconds + ([+-]\d{2}:\d{2}|Z)?)? # match timezone in the format "+HH:MM" or "Z" $ # end of string """, re.VERBOSE, ) - excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] - if re.match(iso_regex, f) and f not in excluded_formats: - return True - return False + + excluded_formats = [ + r"^\d{4}\d{2}\d{2}$", # %Y%m%d + r"^\d{4}\d{2}$", # %Y%m + r"^\d{4}$", # %Y + ] + + if any(re.match(pattern, f) for pattern in excluded_formats): + return False + + return bool(re.match(iso_regex, f)) def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: From a516dc03cbe2c2ff286d45b6b96739950a29f516 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Fri, 30 Dec 2022 18:57:12 +0530 Subject: [PATCH 07/17] test passed --- pandas/_libs/tslibs/strptime.pyx | 40 +++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 27e99706137b6..133d8d42c82a9 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -14,6 +14,7 @@ from cpython.datetime cimport ( import_datetime() from _thread import allocate_lock as _thread_allocate_lock +import re import numpy as np import pytz @@ -43,13 +44,16 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, string_to_dts, ) + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.util cimport ( is_datetime64_object, is_float_object, is_integer_object, ) + from pandas._libs.tslibs.timestamps import Timestamp cnp.import_array() @@ -60,15 +64,33 @@ cdef bint format_is_iso(f: str): Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different but must be consistent. Leading 0s in dates and times are optional. """ - excluded_formats = ["%Y%m"] - - for date_sep in [" ", "/", "\\", "-", ".", ""]: - for time_sep in [" ", "T"]: - for micro_or_tz in ["", "%z", ".%f", ".%f%z"]: - iso_fmt = f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}" - if iso_fmt.startswith(f) and f not in excluded_formats: - return True - return False + iso_regex = re.compile( + r""" + ^ # start of string + (?: # group for alternative date formats + %Y[ -/\\.]%m[ -/\\.]%d # 4-digit year, 2-digit month, 2-digit day + | %Y%m%d # OR no separators + | %Y[ -/\\.]%m # OR year & month separated by [ /, \, -] + | %Y%m # OR 4-digit year, 2-digit month, no separators + | %Y # OR 4-digit year + ) + (?: # group for optional time and timezone + [ T]%H:%M:%S # time format THH:MM:SS, T is space of T + (?: # group for optional fraction second & timezone + %z # timezone in the format +HHMM or -HHMM + | .%f(?:%z|Z)? # OR format .%f%z or .%fZ + )? # make the group optional + )? # making the time and timezone grp optional + $ # end of string + """, + re.VERBOSE, + ) + excluded_formats = [ + r"^%Y%m$", + ] + if any(re.match(pattern, f) for pattern in excluded_formats): + return False + return bool(re.match(iso_regex, f)) def _test_format_is_iso(f: str) -> bool: From 055a9207d12d646fa882c7842b2c3dafdfea57b2 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Fri, 30 Dec 2022 21:44:36 +0530 Subject: [PATCH 08/17] one seperator in a condition --- pandas/_libs/tslibs/strptime.pyx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 133d8d42c82a9..86ac888d0c24b 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -68,13 +68,21 @@ cdef bint format_is_iso(f: str): r""" ^ # start of string (?: # group for alternative date formats - %Y[ -/\\.]%m[ -/\\.]%d # 4-digit year, 2-digit month, 2-digit day + %Y[-]%m[-]%d # year month & date separated by either hyphen + | %Y[/]%m[/]%d # OR separated by forward slash + | %Y[.]%m[.]%d # OR separated by period + | %Y[ ]%m[ ]%d # OR separated by space + | %Y[\\]%m[\\]%d # OR separated by backslash | %Y%m%d # OR no separators - | %Y[ -/\\.]%m # OR year & month separated by [ /, \, -] + | %Y[-]%m # OR year & month separated by hyphene + | %Y[/]%m # OR year & month separated by forward slash + | %Y[.]%m # OR year & month separated by period + | %Y[ ]%m # OR year & month separated by space + | %Y[\\]%m # OR year & month separated by backslash | %Y%m # OR 4-digit year, 2-digit month, no separators | %Y # OR 4-digit year ) - (?: # group for optional time and timezone + (?: # group for optional time and timezone [ T]%H:%M:%S # time format THH:MM:SS, T is space of T (?: # group for optional fraction second & timezone %z # timezone in the format +HHMM or -HHMM From 3f9226473bbc148a8a2731886251b93edc46488d Mon Sep 17 00:00:00 2001 From: Shashwat Date: Sat, 31 Dec 2022 11:02:47 +0530 Subject: [PATCH 09/17] removed unnecessary OR statements --- pandas/_libs/tslibs/strptime.pyx | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 86ac888d0c24b..81df27618af6c 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -74,13 +74,9 @@ cdef bint format_is_iso(f: str): | %Y[ ]%m[ ]%d # OR separated by space | %Y[\\]%m[\\]%d # OR separated by backslash | %Y%m%d # OR no separators - | %Y[-]%m # OR year & month separated by hyphene - | %Y[/]%m # OR year & month separated by forward slash - | %Y[.]%m # OR year & month separated by period - | %Y[ ]%m # OR year & month separated by space - | %Y[\\]%m # OR year & month separated by backslash - | %Y%m # OR 4-digit year, 2-digit month, no separators - | %Y # OR 4-digit year + | %Y[ /.\\-]%m # OR year & month separated by [ /.\\-] + | %Y%m # OR year & month no separators + | %Y # OR just year ) (?: # group for optional time and timezone [ T]%H:%M:%S # time format THH:MM:SS, T is space of T From 08ba66d5a97e49d735564793ea5780d949f6eb1c Mon Sep 17 00:00:00 2001 From: Shashwat Date: Sat, 31 Dec 2022 16:30:35 +0530 Subject: [PATCH 10/17] used backreferencing --- pandas/_libs/tslibs/strptime.pyx | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 81df27618af6c..ee02c2fab5901 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -68,17 +68,13 @@ cdef bint format_is_iso(f: str): r""" ^ # start of string (?: # group for alternative date formats - %Y[-]%m[-]%d # year month & date separated by either hyphen - | %Y[/]%m[/]%d # OR separated by forward slash - | %Y[.]%m[.]%d # OR separated by period - | %Y[ ]%m[ ]%d # OR separated by space - | %Y[\\]%m[\\]%d # OR separated by backslash + %Y([-/.\\ ])%m\1%d # year month & date separated by either [-/.\\ ] | %Y%m%d # OR no separators | %Y[ /.\\-]%m # OR year & month separated by [ /.\\-] | %Y%m # OR year & month no separators | %Y # OR just year ) - (?: # group for optional time and timezone + (?: # group for optional time and timezone [ T]%H:%M:%S # time format THH:MM:SS, T is space of T (?: # group for optional fraction second & timezone %z # timezone in the format +HHMM or -HHMM From ab0e64507886d64b342731ec7dbc270585dd9195 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Sat, 31 Dec 2022 23:41:18 +0530 Subject: [PATCH 11/17] minor bug --- pandas/_libs/tslibs/strptime.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index ee02c2fab5901..4278cc43a735f 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -87,6 +87,8 @@ cdef bint format_is_iso(f: str): ) excluded_formats = [ r"^%Y%m$", + r"^%Y[/.\\ -]%m[ |T]%H:%M.*$", + r"^%Y%m[ |T]%H:%M.*$", ] if any(re.match(pattern, f) for pattern in excluded_formats): return False From 5561743aad06bb6554ddcf3d51eca9284830d4f2 Mon Sep 17 00:00:00 2001 From: Shashwat Date: Sun, 1 Jan 2023 21:00:03 +0530 Subject: [PATCH 12/17] modified version --- pandas/_libs/tslibs/strptime.pyx | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 4278cc43a735f..6821ec264baa9 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -66,29 +66,21 @@ cdef bint format_is_iso(f: str): """ iso_regex = re.compile( r""" - ^ # start of string - (?: # group for alternative date formats - %Y([-/.\\ ])%m\1%d # year month & date separated by either [-/.\\ ] - | %Y%m%d # OR no separators - | %Y[ /.\\-]%m # OR year & month separated by [ /.\\-] - | %Y%m # OR year & month no separators - | %Y # OR just year - ) - (?: # group for optional time and timezone - [ T]%H:%M:%S # time format THH:MM:SS, T is space of T - (?: # group for optional fraction second & timezone - %z # timezone in the format +HHMM or -HHMM - | .%f(?:%z|Z)? # OR format .%f%z or .%fZ - )? # make the group optional - )? # making the time and timezone grp optional - $ # end of string + ^ # start of string + %Y # Year + (?:([-/ \\.]?)%m # month with or without separators + (?:\1%d # day with or without separators + (?:[ |T]%H # hour with separator + (?:\:%M # minute with separator + (?:\:%S # second with separator + (?:%z|.%f(?:%z|Z)? # timezone or fractional second + )?)?)?)?)?)? # optional + $ # end of string """, re.VERBOSE, ) excluded_formats = [ r"^%Y%m$", - r"^%Y[/.\\ -]%m[ |T]%H:%M.*$", - r"^%Y%m[ |T]%H:%M.*$", ] if any(re.match(pattern, f) for pattern in excluded_formats): return False From 42bc6997581a3853309df04a5850ac46fde1c38d Mon Sep 17 00:00:00 2001 From: Shashwat Date: Sun, 1 Jan 2023 21:30:18 +0530 Subject: [PATCH 13/17] non regex excluded format --- pandas/_libs/tslibs/strptime.pyx | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 6821ec264baa9..40ae4dd2fad38 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -44,9 +44,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, string_to_dts, ) - from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime - from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.util cimport ( is_datetime64_object, @@ -69,7 +67,7 @@ cdef bint format_is_iso(f: str): ^ # start of string %Y # Year (?:([-/ \\.]?)%m # month with or without separators - (?:\1%d # day with or without separators + (?: \1%d # day with same separator as for year-month (?:[ |T]%H # hour with separator (?:\:%M # minute with separator (?:\:%S # second with separator @@ -79,12 +77,8 @@ cdef bint format_is_iso(f: str): """, re.VERBOSE, ) - excluded_formats = [ - r"^%Y%m$", - ] - if any(re.match(pattern, f) for pattern in excluded_formats): - return False - return bool(re.match(iso_regex, f)) + excluded_formats = ["%Y%m"] + return re.match(iso_regex, f) is not None and f not in excluded_formats def _test_format_is_iso(f: str) -> bool: From a22783e25ff01d043770e68ab07329044d9e96e5 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 1 Jan 2023 18:05:35 +0000 Subject: [PATCH 14/17] Update pandas/_libs/tslibs/strptime.pyx --- pandas/_libs/tslibs/strptime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 40ae4dd2fad38..94bfa960352d8 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -71,7 +71,7 @@ cdef bint format_is_iso(f: str): (?:[ |T]%H # hour with separator (?:\:%M # minute with separator (?:\:%S # second with separator - (?:%z|.%f(?:%z|Z)? # timezone or fractional second + (?:%z|\.%f(?:%z|Z)? # timezone or fractional second )?)?)?)?)?)? # optional $ # end of string """, From 8ba5d6b0178fe76b9273e5e3ec3ea78a6867cb7e Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 1 Jan 2023 18:05:51 +0000 Subject: [PATCH 15/17] Update pandas/_libs/tslibs/strptime.pyx --- pandas/_libs/tslibs/strptime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 94bfa960352d8..d098c34617a70 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -68,7 +68,7 @@ cdef bint format_is_iso(f: str): %Y # Year (?:([-/ \\.]?)%m # month with or without separators (?: \1%d # day with same separator as for year-month - (?:[ |T]%H # hour with separator + (?:[ T]%H # hour with separator (?:\:%M # minute with separator (?:\:%S # second with separator (?:%z|\.%f(?:%z|Z)? # timezone or fractional second From 287a1b9fc8263e1e25ba44b6344644c2df477fb1 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 1 Jan 2023 18:08:04 +0000 Subject: [PATCH 16/17] Update pandas/_libs/tslibs/strptime.pyx --- pandas/_libs/tslibs/strptime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d098c34617a70..b88a43c7c2d93 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -71,7 +71,7 @@ cdef bint format_is_iso(f: str): (?:[ T]%H # hour with separator (?:\:%M # minute with separator (?:\:%S # second with separator - (?:%z|\.%f(?:%z|Z)? # timezone or fractional second + (?:%z|\.%f(?:%z)? # timezone or fractional second )?)?)?)?)?)? # optional $ # end of string """, From f3d3b3da3a90a38e8965e871ddd48d45af2b68ae Mon Sep 17 00:00:00 2001 From: Shashwat Date: Mon, 2 Jan 2023 16:58:42 +0530 Subject: [PATCH 17/17] added whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b1387e9717079..22f6659367683 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -103,6 +103,7 @@ Other enhancements - :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`) - Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`) - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) +- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`) - .. ---------------------------------------------------------------------------