-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
used regular expression in format_is_iso()
#50468
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
82b7505
67d5612
9b13481
23679e4
f30c0c2
0d0d708
6070a00
3b5aa1d
a516dc0
bb5dd4d
055a920
3f92264
cb186e4
08ba66d
ab0e645
5561743
42bc699
a22783e
8ba5d6b
287a1b9
7cd2cc1
f3d3b3d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ from cpython.datetime cimport ( | |
import_datetime() | ||
|
||
from _thread import allocate_lock as _thread_allocate_lock | ||
import re | ||
|
||
import numpy as np | ||
import pytz | ||
|
@@ -43,13 +44,16 @@ from pandas._libs.tslibs.np_datetime cimport ( | |
pydatetime_to_dt64, | ||
string_to_dts, | ||
) | ||
|
||
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime | ||
|
||
from pandas._libs.tslibs.timestamps cimport _Timestamp | ||
from pandas._libs.util cimport ( | ||
is_datetime64_object, | ||
is_float_object, | ||
is_integer_object, | ||
) | ||
|
||
from pandas._libs.tslibs.timestamps import Timestamp | ||
|
||
cnp.import_array() | ||
|
@@ -60,15 +64,27 @@ cdef bint format_is_iso(f: str): | |
Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different | ||
but must be consistent. Leading 0s in dates and times are optional. | ||
""" | ||
excluded_formats = ["%Y%m"] | ||
|
||
for date_sep in [" ", "/", "\\", "-", ".", ""]: | ||
for time_sep in [" ", "T"]: | ||
for micro_or_tz in ["", "%z", ".%f", ".%f%z"]: | ||
iso_fmt = f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}" | ||
if iso_fmt.startswith(f) and f not in excluded_formats: | ||
return True | ||
return False | ||
iso_regex = re.compile( | ||
r""" | ||
^ # start of string | ||
%Y # Year | ||
(?:([-/ \\.]?)%m # month with or without separators | ||
(?:\1%d # day with or without separators | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. perhaps let's update the comment to |
||
(?:[ |T]%H # hour with separator | ||
MarcoGorelli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
(?:\:%M # minute with separator | ||
(?:\:%S # second with separator | ||
(?:%z|.%f(?:%z|Z)? # timezone or fractional second | ||
MarcoGorelli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
)?)?)?)?)?)? # optional | ||
$ # end of string | ||
""", | ||
re.VERBOSE, | ||
) | ||
excluded_formats = [ | ||
r"^%Y%m$", | ||
] | ||
if any(re.match(pattern, f) for pattern in excluded_formats): | ||
return False | ||
return bool(re.match(iso_regex, f)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need a regex for the excluded formats - would return re.match(iso_regex, f) is not None and f not in excluded_formats work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea worked fine without the regex excluded_format |
||
|
||
|
||
def _test_format_is_iso(f: str) -> bool: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you revert these unrelated changes please?