@@ -3,17 +3,15 @@ Parsing functions for datetime and datetime-like strings.
3
3
"""
4
4
import re
5
5
import time
6
- from io import StringIO
7
6
8
7
from libc.string cimport strchr
9
8
10
9
import cython
11
10
from cython import Py_ssize_t
12
11
13
12
from cpython.object cimport PyObject_Str
14
- from cpython.unicode cimport PyUnicode_Join
15
13
16
- from cpython.datetime cimport datetime, datetime_new, import_datetime
14
+ from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo
17
15
from cpython.version cimport PY_VERSION_HEX
18
16
import_datetime()
19
17
@@ -37,6 +35,7 @@ from pandas._config import get_option
37
35
from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
38
36
from pandas._libs.tslibs.nattype import nat_strings, NaT
39
37
from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
38
+ from pandas._libs.tslibs.frequencies cimport get_rule_month
40
39
41
40
cdef extern from " ../src/headers/portable.h" :
42
41
int getdigit_ascii(char c, int default) nogil
@@ -86,16 +85,15 @@ cdef inline int _parse_4digit(const char* s):
86
85
return result
87
86
88
87
89
- cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
88
+ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
90
89
"""
91
90
Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
91
+
92
92
At the beginning function tries to parse date in MM/DD/YYYY format, but
93
93
if month > 12 - in DD/MM/YYYY (`dayfirst == False`).
94
94
With `dayfirst == True` function makes an attempt to parse date in
95
95
DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY
96
96
97
- Note
98
- ----
99
97
For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-.
100
98
For MM/YYYY: delimiter can be a space or one of /-
101
99
If `date_string` can't be converted to date, then function returns
@@ -104,11 +102,13 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
104
102
Parameters
105
103
----------
106
104
date_string : str
107
- dayfirst : bint
105
+ dayfirst : bool
108
106
109
107
Returns:
110
108
--------
111
- datetime, resolution
109
+ datetime or None
110
+ str or None
111
+ Describing resolution of the parsed string.
112
112
"""
113
113
cdef:
114
114
const char * buf
@@ -156,18 +156,19 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
156
156
raise DateParseError(f" Invalid date specified ({month}/{day})" )
157
157
158
158
159
- cdef inline bint does_string_look_like_time(object parse_string):
159
+ cdef inline bint does_string_look_like_time(str parse_string):
160
160
"""
161
161
Checks whether given string is a time: it has to start either from
162
162
H:MM or from HH:MM, and hour and minute values must be valid.
163
163
164
164
Parameters
165
165
----------
166
- date_string : str
166
+ parse_string : str
167
167
168
168
Returns:
169
169
--------
170
- whether given string is a time
170
+ bool
171
+ Whether given string is potentially a time.
171
172
"""
172
173
cdef:
173
174
const char * buf
@@ -188,9 +189,10 @@ cdef inline bint does_string_look_like_time(object parse_string):
188
189
return 0 <= hour <= 23 and 0 <= minute <= 59
189
190
190
191
191
- def parse_datetime_string (date_string , freq = None , dayfirst = False ,
192
+ def parse_datetime_string (date_string: str , freq = None , dayfirst = False ,
192
193
yearfirst = False , **kwargs ):
193
- """ parse datetime string, only returns datetime.
194
+ """
195
+ Parse datetime string, only returns datetime.
194
196
Also cares special handling matching time patterns.
195
197
196
198
Returns
@@ -270,16 +272,17 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None):
270
272
return res
271
273
272
274
273
- cdef parse_datetime_string_with_reso(date_string, freq = None , dayfirst = False ,
275
+ cdef parse_datetime_string_with_reso(str date_string, freq = None , dayfirst = False ,
274
276
yearfirst = False ):
275
- """ parse datetime string, only returns datetime
277
+ """
278
+ Parse datetime string and try to identify its resolution.
276
279
277
280
Returns
278
281
-------
279
- parsed : datetime
280
- parsed2 : datetime/dateutil.parser._result
281
- reso : str
282
- inferred resolution
282
+ datetime
283
+ datetime/dateutil.parser._result
284
+ str
285
+ Inferred resolution of the parsed string.
283
286
284
287
Raises
285
288
------
@@ -315,18 +318,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
315
318
return parsed, parsed, reso
316
319
317
320
318
- cpdef bint _does_string_look_like_datetime(object py_string):
321
+ cpdef bint _does_string_look_like_datetime(str py_string):
319
322
"""
320
323
Checks whether given string is a datetime: it has to start with '0' or
321
324
be greater than 1000.
322
325
323
326
Parameters
324
327
----------
325
- py_string: object
328
+ py_string: str
326
329
327
330
Returns
328
331
-------
329
- whether given string is a datetime
332
+ bool
333
+ Whether given string is potentially a datetime.
330
334
"""
331
335
cdef:
332
336
const char * buf
@@ -370,9 +374,6 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
370
374
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
371
375
assert isinstance (date_string, str )
372
376
373
- # len(date_string) == 0
374
- # should be NaT???
375
-
376
377
if date_string in nat_strings:
377
378
return NaT, NaT, ' '
378
379
@@ -427,7 +428,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
427
428
if freq is not None :
428
429
# hack attack, #1228
429
430
try :
430
- mnum = MONTH_NUMBERS[_get_rule_month (freq)] + 1
431
+ mnum = MONTH_NUMBERS[get_rule_month (freq)] + 1
431
432
except (KeyError , ValueError ):
432
433
raise DateParseError(f' Unable to retrieve month '
433
434
f' information from given '
@@ -467,21 +468,16 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
467
468
raise ValueError (f' Unable to parse {date_string}' )
468
469
469
470
470
- cdef dateutil_parse(object timestr, object default, ignoretz = False ,
471
+ cdef dateutil_parse(str timestr, object default, ignoretz = False ,
471
472
tzinfos = None , dayfirst = None , yearfirst = None ):
472
473
""" lifted from dateutil to get resolution"""
473
474
474
475
cdef:
475
- object fobj, res, attr, ret, tzdata
476
+ object res, attr, ret, tzdata
476
477
object reso = None
477
478
dict repl = {}
478
479
479
- fobj = StringIO(str (timestr))
480
- res = DEFAULTPARSER._parse(fobj, dayfirst = dayfirst, yearfirst = yearfirst)
481
-
482
- # dateutil 2.2 compat
483
- if isinstance (res, tuple ): # PyTuple_Check
484
- res, _ = res
480
+ res, _ = DEFAULTPARSER._parse(timestr, dayfirst = dayfirst, yearfirst = yearfirst)
485
481
486
482
if res is None :
487
483
raise ValueError (f" Unknown datetime string format, unable to parse: {timestr}" )
@@ -507,20 +503,22 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
507
503
ret = ret + relativedelta.relativedelta(weekday = res.weekday)
508
504
if not ignoretz:
509
505
if callable (tzinfos) or tzinfos and res.tzname in tzinfos:
506
+ # Note: as of 1.0 this is not reached because
507
+ # we never pass tzinfos, see GH#22234
510
508
if callable (tzinfos):
511
509
tzdata = tzinfos(res.tzname, res.tzoffset)
512
510
else :
513
511
tzdata = tzinfos.get(res.tzname)
514
- if isinstance (tzdata, datetime. tzinfo):
515
- tzinfo = tzdata
512
+ if isinstance (tzdata, tzinfo):
513
+ new_tzinfo = tzdata
516
514
elif isinstance (tzdata, str ):
517
- tzinfo = _dateutil_tzstr(tzdata)
515
+ new_tzinfo = _dateutil_tzstr(tzdata)
518
516
elif isinstance (tzdata, int ):
519
- tzinfo = tzoffset(res.tzname, tzdata)
517
+ new_tzinfo = tzoffset(res.tzname, tzdata)
520
518
else :
521
519
raise ValueError (" offset must be tzinfo subclass, "
522
520
" tz string, or int offset" )
523
- ret = ret.replace(tzinfo = tzinfo )
521
+ ret = ret.replace(tzinfo = new_tzinfo )
524
522
elif res.tzname and res.tzname in time.tzname:
525
523
ret = ret.replace(tzinfo = _dateutil_tzlocal())
526
524
elif res.tzoffset == 0 :
@@ -530,27 +528,6 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
530
528
return ret, reso
531
529
532
530
533
- cdef object _get_rule_month(object source, object default = ' DEC' ):
534
- """
535
- Return starting month of given freq, default is December.
536
-
537
- Example
538
- -------
539
- >>> _get_rule_month('D')
540
- 'DEC'
541
-
542
- >>> _get_rule_month('A-JAN')
543
- 'JAN'
544
- """
545
- if hasattr (source, ' freqstr' ):
546
- source = source.freqstr
547
- source = source.upper()
548
- if ' -' not in source:
549
- return default
550
- else :
551
- return source.split(' -' )[1 ]
552
-
553
-
554
531
# ----------------------------------------------------------------------
555
532
# Parsing for type-inference
556
533
@@ -939,14 +916,14 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
939
916
940
917
Parameters
941
918
----------
942
- date_cols : tuple of numpy arrays
919
+ date_cols : tuple[ndarray]
943
920
keep_trivial_numbers : bool, default True
944
921
if True and len(date_cols) == 1, then
945
922
conversion (to string from integer/float zero) is not performed
946
923
947
924
Returns
948
925
-------
949
- arr_of_rows : ndarray (dtype= object)
926
+ arr_of_rows : ndarray[ object]
950
927
951
928
Examples
952
929
--------
@@ -1004,6 +981,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
1004
981
item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1005
982
list_to_join[col_idx] = convert_to_unicode(item, False )
1006
983
PyArray_ITER_NEXT(it)
1007
- result_view[row_idx] = PyUnicode_Join( ' ' , list_to_join)
984
+ result_view[row_idx] = " " .join( list_to_join)
1008
985
1009
986
return result
0 commit comments