@@ -12,7 +12,10 @@ from cpython.datetime cimport (
12
12
datetime,
13
13
datetime_new,
14
14
import_datetime,
15
+ timedelta,
16
+ tzinfo,
15
17
)
18
+ from datetime import timezone
16
19
from cpython.object cimport PyObject_Str
17
20
from cython cimport Py_ssize_t
18
21
from libc.string cimport strchr
@@ -49,6 +52,7 @@ from dateutil.tz import (
49
52
from pandas._config import get_option
50
53
51
54
from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS
55
+ from pandas._libs.tslibs.dtypes cimport npy_unit_to_attrname
52
56
from pandas._libs.tslibs.nattype cimport (
53
57
c_NaT as NaT,
54
58
c_nat_strings as nat_strings,
@@ -120,7 +124,9 @@ cdef int _parse_4digit(const char* s):
120
124
return result
121
125
122
126
123
- cdef object _parse_delimited_date(str date_string, bint dayfirst):
127
+ cdef datetime _parse_delimited_date(
128
+ str date_string, bint dayfirst, NPY_DATETIMEUNIT* creso
129
+ ):
124
130
"""
125
131
Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
126
132
@@ -138,12 +144,12 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
138
144
----------
139
145
date_string : str
140
146
dayfirst : bool
147
+ creso : NPY_DATETIMEUNIT*
148
+ For specifying identified resolution.
141
149
142
150
Returns:
143
151
--------
144
152
datetime or None
145
- str or None
146
- Describing resolution of the parsed string.
147
153
"""
148
154
cdef:
149
155
const char * buf
@@ -157,53 +163,53 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
157
163
month = _parse_2digit(buf)
158
164
day = _parse_2digit(buf + 3 )
159
165
year = _parse_4digit(buf + 6 )
160
- reso = " day "
166
+ creso[ 0 ] = NPY_DATETIMEUNIT.NPY_FR_D
161
167
can_swap = 1
162
168
elif length == 9 and _is_delimiter(buf[1 ]) and _is_delimiter(buf[4 ]):
163
169
# parsing M?DD?YYYY and D?MM?YYYY dates
164
170
month = _parse_1digit(buf)
165
171
day = _parse_2digit(buf + 2 )
166
172
year = _parse_4digit(buf + 5 )
167
- reso = " day "
173
+ creso[ 0 ] = NPY_DATETIMEUNIT.NPY_FR_D
168
174
can_swap = 1
169
175
elif length == 9 and _is_delimiter(buf[2 ]) and _is_delimiter(buf[4 ]):
170
176
# parsing MM?D?YYYY and DD?M?YYYY dates
171
177
month = _parse_2digit(buf)
172
178
day = _parse_1digit(buf + 3 )
173
179
year = _parse_4digit(buf + 5 )
174
- reso = " day "
180
+ creso[ 0 ] = NPY_DATETIMEUNIT.NPY_FR_D
175
181
can_swap = 1
176
182
elif length == 8 and _is_delimiter(buf[1 ]) and _is_delimiter(buf[3 ]):
177
183
# parsing M?D?YYYY and D?M?YYYY dates
178
184
month = _parse_1digit(buf)
179
185
day = _parse_1digit(buf + 2 )
180
186
year = _parse_4digit(buf + 4 )
181
- reso = " day "
187
+ creso[ 0 ] = NPY_DATETIMEUNIT.NPY_FR_D
182
188
can_swap = 1
183
189
elif length == 7 and _is_delimiter(buf[2 ]):
184
190
# parsing MM?YYYY dates
185
191
if buf[2 ] == b" ." :
186
192
# we cannot reliably tell whether e.g. 10.2010 is a float
187
193
# or a date, thus we refuse to parse it here
188
- return None , None
194
+ return None
189
195
month = _parse_2digit(buf)
190
196
year = _parse_4digit(buf + 3 )
191
- reso = " month "
197
+ creso[ 0 ] = NPY_DATETIMEUNIT.NPY_FR_M
192
198
else :
193
- return None , None
199
+ return None
194
200
195
201
if month < 0 or day < 0 or year < 1000 :
196
202
# some part is not an integer, so
197
203
# date_string can't be converted to date, above format
198
- return None , None
204
+ return None
199
205
200
206
if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \
201
207
and (month <= MAX_MONTH or day <= MAX_MONTH):
202
208
if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap:
203
209
day, month = month, day
204
210
# In Python <= 3.6.0 there is no range checking for invalid dates
205
211
# in C api, thus we call faster C version for 3.6.1 or newer
206
- return datetime_new(year, month, day, 0 , 0 , 0 , 0 , None ), reso
212
+ return datetime_new(year, month, day, 0 , 0 , 0 , 0 , None )
207
213
208
214
raise DateParseError(f" Invalid date specified ({month}/{day})" )
209
215
@@ -264,6 +270,7 @@ def parse_datetime_string(
264
270
265
271
cdef:
266
272
datetime dt
273
+ NPY_DATETIMEUNIT creso
267
274
268
275
if not _does_string_look_like_datetime(date_string ):
269
276
raise ValueError (f' Given date string "{date_string}" not likely a datetime' )
@@ -274,7 +281,7 @@ def parse_datetime_string(
274
281
yearfirst = yearfirst)
275
282
return dt
276
283
277
- dt, _ = _parse_delimited_date(date_string, dayfirst)
284
+ dt = _parse_delimited_date(date_string, dayfirst, & creso )
278
285
if dt is not None :
279
286
return dt
280
287
@@ -351,18 +358,19 @@ def parse_datetime_string_with_reso(
351
358
bint string_to_dts_failed
352
359
npy_datetimestruct dts
353
360
NPY_DATETIMEUNIT out_bestunit
354
- int out_local
361
+ int out_local = 0
355
362
int out_tzoffset
363
+ tzinfo tz
356
364
357
365
if not _does_string_look_like_datetime(date_string):
358
366
raise ValueError (f' Given date string "{date_string}" not likely a datetime' )
359
367
360
- parsed, reso = _parse_delimited_date(date_string, dayfirst)
368
+ parsed = _parse_delimited_date(date_string, dayfirst, & out_bestunit )
361
369
if parsed is not None :
370
+ reso = npy_unit_to_attrname[out_bestunit]
362
371
return parsed, reso
363
372
364
373
# Try iso8601 first, as it handles nanoseconds
365
- # TODO: does this render some/all of parse_delimited_date redundant?
366
374
string_to_dts_failed = string_to_dts(
367
375
date_string, & dts, & out_bestunit, & out_local,
368
376
& out_tzoffset, False
@@ -372,31 +380,25 @@ def parse_datetime_string_with_reso(
372
380
NPY_DATETIMEUNIT.NPY_FR_ps,
373
381
NPY_DATETIMEUNIT.NPY_FR_fs,
374
382
NPY_DATETIMEUNIT.NPY_FR_as}
375
- if out_bestunit in timestamp_units or out_local:
376
- # TODO: the not-out_local case we could do without Timestamp;
377
- # avoid circular import
383
+ if out_bestunit in timestamp_units:
384
+ # TODO: avoid circular import
378
385
from pandas import Timestamp
379
386
parsed = Timestamp(date_string)
380
387
else :
381
- parsed = datetime(
382
- dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us
388
+ if out_local:
389
+ tz = timezone(timedelta(minutes = out_tzoffset))
390
+ else :
391
+ tz = None
392
+ parsed = datetime_new(
393
+ dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
383
394
)
384
395
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
385
396
# The new resolution will just be nano
386
397
# GH 50417
387
398
if out_bestunit in timestamp_units:
388
399
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
389
- reso = {
390
- NPY_DATETIMEUNIT.NPY_FR_Y: " year" ,
391
- NPY_DATETIMEUNIT.NPY_FR_M: " month" ,
392
- NPY_DATETIMEUNIT.NPY_FR_D: " day" ,
393
- NPY_DATETIMEUNIT.NPY_FR_h: " hour" ,
394
- NPY_DATETIMEUNIT.NPY_FR_m: " minute" ,
395
- NPY_DATETIMEUNIT.NPY_FR_s: " second" ,
396
- NPY_DATETIMEUNIT.NPY_FR_ms: " millisecond" ,
397
- NPY_DATETIMEUNIT.NPY_FR_us: " microsecond" ,
398
- NPY_DATETIMEUNIT.NPY_FR_ns: " nanosecond" ,
399
- }[out_bestunit]
400
+
401
+ reso = npy_unit_to_attrname[out_bestunit]
400
402
return parsed, reso
401
403
402
404
try :
0 commit comments