Skip to content

Commit db62039

Browse files
jbrockmendelWillAyd
authored andcommitted
CLN: tslibs.parsing (#30394)
1 parent 9cefd65 commit db62039

File tree

9 files changed

+63
-83
lines changed

9 files changed

+63
-83
lines changed

pandas/_libs/tslib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
188188
return result
189189

190190

191-
def _test_parse_iso8601(object ts):
191+
def _test_parse_iso8601(ts: str):
192192
"""
193193
TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
194194
only for testing, actual construction uses `convert_str_to_tsobject`

pandas/_libs/tslibs/conversion.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -444,15 +444,15 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
444444
bint dayfirst=False,
445445
bint yearfirst=False):
446446
"""
447-
Convert a string-like (bytes or unicode) input `ts`, along with optional
448-
timezone object `tz` to a _TSObject.
447+
Convert a string input `ts`, along with optional timezone object`tz`
448+
to a _TSObject.
449449
450450
The optional arguments `dayfirst` and `yearfirst` are passed to the
451451
dateutil parser.
452452
453453
Parameters
454454
----------
455-
ts : bytes or unicode
455+
ts : str
456456
Value to be converted to _TSObject
457457
tz : tzinfo or None
458458
timezone for the timezone-aware output

pandas/_libs/tslibs/frequencies.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
cpdef object get_rule_month(object source, object default=*)
3+
cpdef str get_rule_month(object source, str default=*)
44

55
cpdef get_freq_code(freqstr)
66
cpdef object get_freq(object freq)

pandas/_libs/tslibs/frequencies.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -485,18 +485,18 @@ cdef bint _is_weekly(str rule):
485485

486486
# ----------------------------------------------------------------------
487487

488-
cpdef object get_rule_month(object source, object default='DEC'):
488+
cpdef str get_rule_month(object source, str default="DEC"):
489489
"""
490490
Return starting month of given freq, default is December.
491491
492492
Parameters
493493
----------
494494
source : object
495-
default : object (default "DEC")
495+
default : str, default "DEC"
496496
497497
Returns
498498
-------
499-
rule_month: object (usually string)
499+
rule_month: str
500500
501501
Examples
502502
--------

pandas/_libs/tslibs/np_datetime.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,6 @@ cdef npy_datetime get_datetime64_value(object obj) nogil
7272
cdef npy_timedelta get_timedelta64_value(object obj) nogil
7373
cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
7474

75-
cdef int _string_to_dts(object val, npy_datetimestruct* dts,
75+
cdef int _string_to_dts(str val, npy_datetimestruct* dts,
7676
int* out_local, int* out_tzoffset,
7777
bint want_exc) except? -1

pandas/_libs/tslibs/np_datetime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
167167
return dtstruct_to_dt64(dts)
168168

169169

170-
cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
170+
cdef inline int _string_to_dts(str val, npy_datetimestruct* dts,
171171
int* out_local, int* out_tzoffset,
172172
bint want_exc) except? -1:
173173
cdef:

pandas/_libs/tslibs/parsing.pyx

+40-63
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@ Parsing functions for datetime and datetime-like strings.
33
"""
44
import re
55
import time
6-
from io import StringIO
76

87
from libc.string cimport strchr
98

109
import cython
1110
from cython import Py_ssize_t
1211

1312
from cpython.object cimport PyObject_Str
14-
from cpython.unicode cimport PyUnicode_Join
1513

16-
from cpython.datetime cimport datetime, datetime_new, import_datetime
14+
from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo
1715
from cpython.version cimport PY_VERSION_HEX
1816
import_datetime()
1917

@@ -37,6 +35,7 @@ from pandas._config import get_option
3735
from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
3836
from pandas._libs.tslibs.nattype import nat_strings, NaT
3937
from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
38+
from pandas._libs.tslibs.frequencies cimport get_rule_month
4039

4140
cdef extern from "../src/headers/portable.h":
4241
int getdigit_ascii(char c, int default) nogil
@@ -86,16 +85,15 @@ cdef inline int _parse_4digit(const char* s):
8685
return result
8786

8887

89-
cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
88+
cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
9089
"""
9190
Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY.
91+
9292
At the beginning function tries to parse date in MM/DD/YYYY format, but
9393
if month > 12 - in DD/MM/YYYY (`dayfirst == False`).
9494
With `dayfirst == True` function makes an attempt to parse date in
9595
DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY
9696
97-
Note
98-
----
9997
For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-.
10098
For MM/YYYY: delimiter can be a space or one of /-
10199
If `date_string` can't be converted to date, then function returns
@@ -104,11 +102,13 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
104102
Parameters
105103
----------
106104
date_string : str
107-
dayfirst : bint
105+
dayfirst : bool
108106
109107
Returns:
110108
--------
111-
datetime, resolution
109+
datetime or None
110+
str or None
111+
Describing resolution of the parsed string.
112112
"""
113113
cdef:
114114
const char* buf
@@ -156,18 +156,19 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
156156
raise DateParseError(f"Invalid date specified ({month}/{day})")
157157

158158

159-
cdef inline bint does_string_look_like_time(object parse_string):
159+
cdef inline bint does_string_look_like_time(str parse_string):
160160
"""
161161
Checks whether given string is a time: it has to start either from
162162
H:MM or from HH:MM, and hour and minute values must be valid.
163163
164164
Parameters
165165
----------
166-
date_string : str
166+
parse_string : str
167167
168168
Returns:
169169
--------
170-
whether given string is a time
170+
bool
171+
Whether given string is potentially a time.
171172
"""
172173
cdef:
173174
const char* buf
@@ -188,9 +189,10 @@ cdef inline bint does_string_look_like_time(object parse_string):
188189
return 0 <= hour <= 23 and 0 <= minute <= 59
189190

190191

191-
def parse_datetime_string(date_string, freq=None, dayfirst=False,
192+
def parse_datetime_string(date_string: str, freq=None, dayfirst=False,
192193
yearfirst=False, **kwargs):
193-
"""parse datetime string, only returns datetime.
194+
"""
195+
Parse datetime string, only returns datetime.
194196
Also cares special handling matching time patterns.
195197
196198
Returns
@@ -270,16 +272,17 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None):
270272
return res
271273

272274

273-
cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
275+
cdef parse_datetime_string_with_reso(str date_string, freq=None, dayfirst=False,
274276
yearfirst=False):
275-
"""parse datetime string, only returns datetime
277+
"""
278+
Parse datetime string and try to identify its resolution.
276279
277280
Returns
278281
-------
279-
parsed : datetime
280-
parsed2 : datetime/dateutil.parser._result
281-
reso : str
282-
inferred resolution
282+
datetime
283+
datetime/dateutil.parser._result
284+
str
285+
Inferred resolution of the parsed string.
283286
284287
Raises
285288
------
@@ -315,18 +318,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
315318
return parsed, parsed, reso
316319

317320

318-
cpdef bint _does_string_look_like_datetime(object py_string):
321+
cpdef bint _does_string_look_like_datetime(str py_string):
319322
"""
320323
Checks whether given string is a datetime: it has to start with '0' or
321324
be greater than 1000.
322325
323326
Parameters
324327
----------
325-
py_string: object
328+
py_string: str
326329
327330
Returns
328331
-------
329-
whether given string is a datetime
332+
bool
333+
Whether given string is potentially a datetime.
330334
"""
331335
cdef:
332336
const char *buf
@@ -370,9 +374,6 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
370374
# special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
371375
assert isinstance(date_string, str)
372376

373-
# len(date_string) == 0
374-
# should be NaT???
375-
376377
if date_string in nat_strings:
377378
return NaT, NaT, ''
378379

@@ -427,7 +428,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
427428
if freq is not None:
428429
# hack attack, #1228
429430
try:
430-
mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1
431+
mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1
431432
except (KeyError, ValueError):
432433
raise DateParseError(f'Unable to retrieve month '
433434
f'information from given '
@@ -467,21 +468,16 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
467468
raise ValueError(f'Unable to parse {date_string}')
468469

469470

470-
cdef dateutil_parse(object timestr, object default, ignoretz=False,
471+
cdef dateutil_parse(str timestr, object default, ignoretz=False,
471472
tzinfos=None, dayfirst=None, yearfirst=None):
472473
""" lifted from dateutil to get resolution"""
473474

474475
cdef:
475-
object fobj, res, attr, ret, tzdata
476+
object res, attr, ret, tzdata
476477
object reso = None
477478
dict repl = {}
478479

479-
fobj = StringIO(str(timestr))
480-
res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst)
481-
482-
# dateutil 2.2 compat
483-
if isinstance(res, tuple): # PyTuple_Check
484-
res, _ = res
480+
res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst)
485481

486482
if res is None:
487483
raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}")
@@ -507,20 +503,22 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
507503
ret = ret + relativedelta.relativedelta(weekday=res.weekday)
508504
if not ignoretz:
509505
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
506+
# Note: as of 1.0 this is not reached because
507+
# we never pass tzinfos, see GH#22234
510508
if callable(tzinfos):
511509
tzdata = tzinfos(res.tzname, res.tzoffset)
512510
else:
513511
tzdata = tzinfos.get(res.tzname)
514-
if isinstance(tzdata, datetime.tzinfo):
515-
tzinfo = tzdata
512+
if isinstance(tzdata, tzinfo):
513+
new_tzinfo = tzdata
516514
elif isinstance(tzdata, str):
517-
tzinfo = _dateutil_tzstr(tzdata)
515+
new_tzinfo = _dateutil_tzstr(tzdata)
518516
elif isinstance(tzdata, int):
519-
tzinfo = tzoffset(res.tzname, tzdata)
517+
new_tzinfo = tzoffset(res.tzname, tzdata)
520518
else:
521519
raise ValueError("offset must be tzinfo subclass, "
522520
"tz string, or int offset")
523-
ret = ret.replace(tzinfo=tzinfo)
521+
ret = ret.replace(tzinfo=new_tzinfo)
524522
elif res.tzname and res.tzname in time.tzname:
525523
ret = ret.replace(tzinfo=_dateutil_tzlocal())
526524
elif res.tzoffset == 0:
@@ -530,27 +528,6 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
530528
return ret, reso
531529

532530

533-
cdef object _get_rule_month(object source, object default='DEC'):
534-
"""
535-
Return starting month of given freq, default is December.
536-
537-
Example
538-
-------
539-
>>> _get_rule_month('D')
540-
'DEC'
541-
542-
>>> _get_rule_month('A-JAN')
543-
'JAN'
544-
"""
545-
if hasattr(source, 'freqstr'):
546-
source = source.freqstr
547-
source = source.upper()
548-
if '-' not in source:
549-
return default
550-
else:
551-
return source.split('-')[1]
552-
553-
554531
# ----------------------------------------------------------------------
555532
# Parsing for type-inference
556533

@@ -939,14 +916,14 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
939916
940917
Parameters
941918
----------
942-
date_cols : tuple of numpy arrays
919+
date_cols : tuple[ndarray]
943920
keep_trivial_numbers : bool, default True
944921
if True and len(date_cols) == 1, then
945922
conversion (to string from integer/float zero) is not performed
946923
947924
Returns
948925
-------
949-
arr_of_rows : ndarray (dtype=object)
926+
arr_of_rows : ndarray[object]
950927
951928
Examples
952929
--------
@@ -1004,6 +981,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True):
1004981
item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1005982
list_to_join[col_idx] = convert_to_unicode(item, False)
1006983
PyArray_ITER_NEXT(it)
1007-
result_view[row_idx] = PyUnicode_Join(' ', list_to_join)
984+
result_view[row_idx] = " ".join(list_to_join)
1008985

1009986
return result

0 commit comments

Comments
 (0)