-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
WIP Share paths 2 #50258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP Share paths 2 #50258
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ from pandas._libs.tslibs.np_datetime cimport ( | |
pydatetime_to_dt64, | ||
string_to_dts, | ||
) | ||
from pandas._libs.tslibs.strptime cimport strptime | ||
from pandas._libs.util cimport ( | ||
is_datetime64_object, | ||
is_float_object, | ||
|
@@ -75,6 +76,19 @@ from pandas._libs.tslibs.timestamps import Timestamp | |
from pandas._libs.missing cimport checknull_with_nat_and_na | ||
from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single | ||
|
||
from _thread import allocate_lock as _thread_allocate_lock | ||
|
||
from _strptime import _getlang | ||
|
||
from pandas._libs.tslibs.strptime import TimeRE | ||
|
||
_cache_lock = _thread_allocate_lock() | ||
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock | ||
# first! | ||
_TimeRE_cache = TimeRE() | ||
_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache | ||
_regex_cache = {} | ||
|
||
|
||
def _test_parse_iso8601(ts: str): | ||
""" | ||
|
@@ -524,6 +538,41 @@ cpdef array_to_datetime( | |
result = np.empty(n, dtype="M8[ns]") | ||
iresult = result.view("i8") | ||
|
||
if format is not None and not require_iso8601: | ||
if "%W" in format or "%U" in format: | ||
if "%Y" not in format and "%y" not in format: | ||
raise ValueError("Cannot use '%W' or '%U' without day and year") | ||
if "%A" not in format and "%a" not in format and "%w" not in format: | ||
raise ValueError("Cannot use '%W' or '%U' without day and year") | ||
elif "%Z" in format and "%z" in format: | ||
raise ValueError("Cannot parse both %Z and %z") | ||
|
||
global _TimeRE_cache, _regex_cache | ||
with _cache_lock: | ||
if _getlang() != _TimeRE_cache.locale_time.lang: | ||
_TimeRE_cache = TimeRE() | ||
_regex_cache.clear() | ||
if len(_regex_cache) > _CACHE_MAX_SIZE: | ||
_regex_cache.clear() | ||
locale_time = _TimeRE_cache.locale_time | ||
format_regex = _regex_cache.get(format) | ||
if not format_regex: | ||
try: | ||
format_regex = _TimeRE_cache.compile(format) | ||
# KeyError raised when a bad format is found; can be specified as | ||
# \\, in which case it was a stray % but with a space after it | ||
except KeyError, err: | ||
bad_directive = err.args[0] | ||
if bad_directive == "\\": | ||
bad_directive = "%" | ||
del err | ||
raise ValueError(f"'{bad_directive}' is a bad directive " | ||
f"in format '{format}'") | ||
# IndexError only occurs when the format string is "%" | ||
except IndexError: | ||
raise ValueError(f"stray % in format '{format}'") | ||
_regex_cache[format] = format_regex | ||
|
||
try: | ||
for i in range(n): | ||
val = values[i] | ||
|
@@ -556,17 +605,10 @@ cpdef array_to_datetime( | |
seen_datetime = True | ||
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) | ||
|
||
elif is_integer_object(val) or is_float_object(val): | ||
if require_iso8601: | ||
if is_coerce: | ||
iresult[i] = NPY_NAT | ||
continue | ||
elif is_raise: | ||
raise ValueError( | ||
f"time data \"{val}\" at position {i} doesn't " | ||
f"match format \"{format}\"" | ||
) | ||
return values, tz_out | ||
elif ( | ||
(is_integer_object(val) or is_float_object(val)) | ||
and format is None | ||
): | ||
Comment on lines
+608
to
+611
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parsing integers/floats with |
||
# these must be ns unit by-definition | ||
seen_integer = True | ||
|
||
|
@@ -585,7 +627,15 @@ cpdef array_to_datetime( | |
except OverflowError: | ||
iresult[i] = NPY_NAT | ||
|
||
elif isinstance(val, str): | ||
elif ( | ||
(is_integer_object(val) or is_float_object(val)) | ||
or isinstance(val, str) | ||
): | ||
if not isinstance(val, str): | ||
if val != val or val == NPY_NAT: | ||
iresult[i] = NPY_NAT | ||
continue | ||
|
||
# string | ||
if type(val) is not str: | ||
# GH#32264 np.str_ object | ||
|
@@ -595,6 +645,42 @@ cpdef array_to_datetime( | |
iresult[i] = NPY_NAT | ||
continue | ||
|
||
if ( | ||
format is not None | ||
and ( | ||
not require_iso8601 | ||
or ( | ||
require_iso8601 and format == "%Y%m%d" and len(val) != 8 | ||
) | ||
Comment on lines
+652
to
+654
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the fast ISO path handles |
||
) | ||
and val not in ("today", "now") | ||
): | ||
try: | ||
_iresult, _tzinfo = strptime( | ||
val, format, exact, format_regex, locale_time, dts | ||
) | ||
except (ValueError, OverflowError): | ||
if is_coerce: | ||
iresult[i] = NPY_NAT | ||
continue | ||
elif is_raise: | ||
raise | ||
return values, tz_out | ||
value = tz_localize_to_utc_single(_iresult, _tzinfo) | ||
if _tzinfo is not None: | ||
found_tz = True | ||
tz_out = convert_timezone( | ||
_tzinfo, | ||
tz_out, | ||
found_naive, | ||
found_tz, | ||
utc_convert, | ||
) | ||
else: | ||
found_naive = True | ||
Comment on lines
+669
to
+680
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the thing with |
||
iresult[i] = value | ||
continue | ||
|
||
string_to_dts_failed = string_to_dts( | ||
val, &dts, &out_bestunit, &out_local, | ||
&out_tzoffset, False, format, exact | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct | ||
|
||
|
||
cdef strptime( | ||
val, | ||
str fmt, | ||
bint exact, | ||
format_regex, | ||
locale_time, | ||
npy_datetimestruct dts, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,6 +71,196 @@ cdef dict _parse_code_table = {"y": 0, | |
"V": 21, | ||
"u": 22} | ||
|
||
cdef strptime( | ||
val, | ||
str fmt, | ||
bint exact, | ||
format_regex, | ||
locale_time, | ||
npy_datetimestruct dts, | ||
): | ||
if exact: | ||
found = format_regex.match(val) | ||
if not found: | ||
raise ValueError(f"time data '{val}' does not match " | ||
f"format '{fmt}' (match)") | ||
if len(val) != found.end(): | ||
raise ValueError(f"unconverted data remains: {val[found.end():]}") | ||
|
||
# search | ||
else: | ||
found = format_regex.search(val) | ||
if not found: | ||
raise ValueError(f"time data {repr(val)} does not match format " | ||
f"{repr(fmt)} (search)") | ||
|
||
iso_year = -1 | ||
year = 1900 | ||
month = day = 1 | ||
hour = minute = second = ns = us = 0 | ||
tz = None | ||
# Default to -1 to signify that values not known; not critical to have, | ||
# though | ||
iso_week = week_of_year = -1 | ||
week_of_year_start = -1 | ||
# weekday and julian defaulted to -1 so as to signal need to calculate | ||
# values | ||
weekday = julian = -1 | ||
found_dict = found.groupdict() | ||
for group_key in found_dict.iterkeys(): | ||
# Directives not explicitly handled below: | ||
# c, x, X | ||
# handled by making out of other directives | ||
# U, W | ||
# worthless without day of the week | ||
parse_code = _parse_code_table[group_key] | ||
|
||
if parse_code == 0: | ||
year = int(found_dict["y"]) | ||
# Open Group specification for strptime() states that a %y | ||
# value in the range of [00, 68] is in the century 2000, while | ||
# [69,99] is in the century 1900 | ||
if year <= 68: | ||
year += 2000 | ||
else: | ||
year += 1900 | ||
elif parse_code == 1: | ||
year = int(found_dict["Y"]) | ||
elif parse_code == 2: | ||
month = int(found_dict["m"]) | ||
# elif group_key == 'B': | ||
elif parse_code == 3: | ||
month = locale_time.f_month.index(found_dict["B"].lower()) | ||
# elif group_key == 'b': | ||
elif parse_code == 4: | ||
month = locale_time.a_month.index(found_dict["b"].lower()) | ||
# elif group_key == 'd': | ||
elif parse_code == 5: | ||
day = int(found_dict["d"]) | ||
# elif group_key == 'H': | ||
elif parse_code == 6: | ||
hour = int(found_dict["H"]) | ||
elif parse_code == 7: | ||
hour = int(found_dict["I"]) | ||
ampm = found_dict.get("p", "").lower() | ||
# If there was no AM/PM indicator, we'll treat this like AM | ||
if ampm in ("", locale_time.am_pm[0]): | ||
# We're in AM so the hour is correct unless we're | ||
# looking at 12 midnight. | ||
# 12 midnight == 12 AM == hour 0 | ||
if hour == 12: | ||
hour = 0 | ||
elif ampm == locale_time.am_pm[1]: | ||
# We're in PM so we need to add 12 to the hour unless | ||
# we're looking at 12 noon. | ||
# 12 noon == 12 PM == hour 12 | ||
if hour != 12: | ||
hour += 12 | ||
elif parse_code == 8: | ||
minute = int(found_dict["M"]) | ||
elif parse_code == 9: | ||
second = int(found_dict["S"]) | ||
elif parse_code == 10: | ||
s = found_dict["f"] | ||
# Pad to always return nanoseconds | ||
s += "0" * (9 - len(s)) | ||
us = long(s) | ||
ns = us % 1000 | ||
us = us // 1000 | ||
elif parse_code == 11: | ||
weekday = locale_time.f_weekday.index(found_dict["A"].lower()) | ||
elif parse_code == 12: | ||
weekday = locale_time.a_weekday.index(found_dict["a"].lower()) | ||
elif parse_code == 13: | ||
weekday = int(found_dict["w"]) | ||
if weekday == 0: | ||
weekday = 6 | ||
else: | ||
weekday -= 1 | ||
elif parse_code == 14: | ||
julian = int(found_dict["j"]) | ||
elif parse_code == 15 or parse_code == 16: | ||
week_of_year = int(found_dict[group_key]) | ||
if group_key == "U": | ||
# U starts week on Sunday. | ||
week_of_year_start = 6 | ||
else: | ||
# W starts week on Monday. | ||
week_of_year_start = 0 | ||
elif parse_code == 17: | ||
tz = pytz.timezone(found_dict["Z"]) | ||
elif parse_code == 19: | ||
tz = parse_timezone_directive(found_dict["z"]) | ||
elif parse_code == 20: | ||
iso_year = int(found_dict["G"]) | ||
elif parse_code == 21: | ||
iso_week = int(found_dict["V"]) | ||
elif parse_code == 22: | ||
weekday = int(found_dict["u"]) | ||
weekday -= 1 | ||
|
||
# don't assume default values for ISO week/year | ||
if iso_year != -1: | ||
if iso_week == -1 or weekday == -1: | ||
raise ValueError("ISO year directive '%G' must be used with " | ||
"the ISO week directive '%V' and a weekday " | ||
"directive '%A', '%a', '%w', or '%u'.") | ||
if julian != -1: | ||
raise ValueError("Day of the year directive '%j' is not " | ||
"compatible with ISO year directive '%G'. " | ||
"Use '%Y' instead.") | ||
elif year != -1 and week_of_year == -1 and iso_week != -1: | ||
if weekday == -1: | ||
raise ValueError("ISO week directive '%V' must be used with " | ||
"the ISO year directive '%G' and a weekday " | ||
"directive '%A', '%a', '%w', or '%u'.") | ||
else: | ||
raise ValueError("ISO week directive '%V' is incompatible with " | ||
"the year directive '%Y'. Use the ISO year " | ||
"'%G' instead.") | ||
|
||
# If we know the wk of the year and what day of that wk, we can figure | ||
# out the Julian day of the year. | ||
if julian == -1 and weekday != -1: | ||
if week_of_year != -1: | ||
week_starts_Mon = week_of_year_start == 0 | ||
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, | ||
week_starts_Mon) | ||
elif iso_year != -1 and iso_week != -1: | ||
year, julian = _calc_julian_from_V(iso_year, iso_week, | ||
weekday + 1) | ||
# Cannot pre-calculate date() since can change in Julian | ||
# calculation and thus could have different value for the day of the wk | ||
# calculation. | ||
if julian == -1: | ||
# Need to add 1 to result since first day of the year is 1, not | ||
# 0. | ||
ordinal = date(year, month, day).toordinal() | ||
julian = ordinal - date(year, 1, 1).toordinal() + 1 | ||
else: | ||
# Assume that if they bothered to include Julian day it will | ||
# be accurate. | ||
datetime_result = date.fromordinal( | ||
(julian - 1) + date(year, 1, 1).toordinal()) | ||
year = datetime_result.year | ||
month = datetime_result.month | ||
day = datetime_result.day | ||
if weekday == -1: | ||
weekday = date(year, month, day).weekday() | ||
|
||
dts.year = year | ||
dts.month = month | ||
dts.day = day | ||
dts.hour = hour | ||
dts.min = minute | ||
dts.sec = second | ||
dts.us = us | ||
dts.ps = ns * 1000 | ||
|
||
iresult = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) | ||
check_dts_bounds(&dts) | ||
return iresult, tz | ||
|
||
|
||
def array_strptime( | ||
ndarray[object] values, | ||
Comment on lines
265
to
266
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be possible to delete this completely now |
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
taken from
pandas/pandas/_libs/tslibs/strptime.pyx
Lines 112 to 145 in 5a372d8