From 2ba5426d041400966f5fd964de57bb47d2238b5d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 21 Dec 2019 11:32:44 -0800 Subject: [PATCH 1/4] CLN: tslibs typing, docstrings --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 6 +-- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 2 +- pandas/_libs/tslibs/parsing.pyx | 55 +++++++++++++----------- pandas/_libs/tslibs/period.pyx | 21 ++++----- pandas/plotting/_matplotlib/converter.py | 2 + 7 files changed, 48 insertions(+), 42 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 598def4e1d9fa..cbe6dd6c2322d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -188,7 +188,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, return result -def _test_parse_iso8601(object ts): +def _test_parse_iso8601(ts: str): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used only for testing, actual construction uses `convert_str_to_tsobject` diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c5315219b8422..2988d7bae9a5e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -444,15 +444,15 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, bint dayfirst=False, bint yearfirst=False): """ - Convert a string-like (bytes or unicode) input `ts`, along with optional - timezone object `tz` to a _TSObject. + Convert a string input `ts`, along with optional timezone object`tz` + to a _TSObject. The optional arguments `dayfirst` and `yearfirst` are passed to the dateutil parser. Parameters ---------- - ts : bytes or unicode + ts : str Value to be converted to _TSObject tz : tzinfo or None timezone for the timezone-aware output diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 020bcdf0a7b15..ebedee79405e5 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -72,6 +72,6 @@ cdef npy_datetime get_datetime64_value(object obj) nogil cdef npy_timedelta get_timedelta64_value(object obj) nogil cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil -cdef int _string_to_dts(object val, npy_datetimestruct* dts, +cdef int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index b9406074bb130..b59a1101e0bf7 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -167,7 +167,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): return dtstruct_to_dt64(dts) -cdef inline int _string_to_dts(object val, npy_datetimestruct* dts, +cdef inline int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1: cdef: diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ecf3e35c86d76..2bc0d3d29ca17 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -86,16 +86,15 @@ cdef inline int _parse_4digit(const char* s): return result -cdef inline object _parse_delimited_date(object date_string, bint dayfirst): +cdef inline object _parse_delimited_date(str date_string, bint dayfirst): """ Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + At the beginning function tries to parse date in MM/DD/YYYY format, but if month > 12 - in DD/MM/YYYY (`dayfirst == False`). With `dayfirst == True` function makes an attempt to parse date in DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY - Note - ---- For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. For MM/YYYY: delimiter can be a space or one of /- If `date_string` can't be converted to date, then function returns @@ -104,11 +103,13 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): Parameters ---------- date_string : str - dayfirst : bint + dayfirst : bool Returns: -------- - datetime, resolution + datetime or Nont + str or None + Describing resolution of the parsed string. """ cdef: const char* buf @@ -156,18 +157,19 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): raise DateParseError(f"Invalid date specified ({month}/{day})") -cdef inline bint does_string_look_like_time(object parse_string): +cdef inline bint does_string_look_like_time(str parse_string): """ Checks whether given string is a time: it has to start either from H:MM or from HH:MM, and hour and minute values must be valid. Parameters ---------- - date_string : str + parse_string : str Returns: -------- - whether given string is a time + bool + Whether given string is potentially a time. """ cdef: const char* buf @@ -188,9 +190,10 @@ cdef inline bint does_string_look_like_time(object parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def parse_datetime_string(date_string, freq=None, dayfirst=False, +def parse_datetime_string(date_string: str, freq=None, dayfirst=False, yearfirst=False, **kwargs): - """parse datetime string, only returns datetime. + """ + Parse datetime string, only returns datetime. Also cares special handling matching time patterns. Returns @@ -270,16 +273,17 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): return res -cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, +cdef parse_datetime_string_with_reso(str date_string, freq=None, dayfirst=False, yearfirst=False): - """parse datetime string, only returns datetime + """ + Parse datetime string and try to identify its resolution. Returns ------- - parsed : datetime - parsed2 : datetime/dateutil.parser._result - reso : str - inferred resolution + datetime + datetime/dateutil.parser._result + str + Inferred resolution of the parsed string. Raises ------ @@ -315,18 +319,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, return parsed, parsed, reso -cpdef bint _does_string_look_like_datetime(object py_string): +cpdef bint _does_string_look_like_datetime(str py_string): """ Checks whether given string is a datetime: it has to start with '0' or be greater than 1000. Parameters ---------- - py_string: object + py_string: str Returns ------- - whether given string is a datetime + bool + Whether given string is potentially a datetime. """ cdef: const char *buf @@ -370,9 +375,6 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 assert isinstance(date_string, str) - # len(date_string) == 0 - # should be NaT??? - if date_string in nat_strings: return NaT, NaT, '' @@ -530,7 +532,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, return ret, reso -cdef object _get_rule_month(object source, object default='DEC'): +cdef str _get_rule_month(object source): """ Return starting month of given freq, default is December. @@ -546,7 +548,8 @@ cdef object _get_rule_month(object source, object default='DEC'): source = source.freqstr source = source.upper() if '-' not in source: - return default + # Default is December + return "DEC" else: return source.split('-')[1] @@ -939,14 +942,14 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): Parameters ---------- - date_cols : tuple of numpy arrays + date_cols : tuple[ndarray] keep_trivial_numbers : bool, default True if True and len(date_cols) == 1, then conversion (to string from integer/float zero) is not performed Returns ------- - arr_of_rows : ndarray (dtype=object) + arr_of_rows : ndarray[object] Examples -------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a6503c00a41bb..a8dabac1527b5 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1191,12 +1191,15 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: return dtstruct_to_dt64(&dts) -def period_format(int64_t value, int freq, object fmt=None): +cdef str period_format(int64_t value, int freq, object fmt=None): cdef: int freq_group if value == NPY_NAT: - return repr(NaT) + return "NaT" + + if isinstance(fmt, str): + fmt = fmt.encode("utf-8") if fmt is None: freq_group = get_freq_group(freq) @@ -1242,24 +1245,22 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] -cdef object _period_strftime(int64_t value, int freq, object fmt): +cdef str _period_strftime(int64_t value, int freq, bytes fmt): cdef: Py_ssize_t i npy_datetimestruct dts char *formatted - object pat, repl, result + bytes pat, brepl list found_pat = [False] * len(extra_fmts) int year, quarter - - if isinstance(fmt, unicode): - fmt = fmt.encode('utf-8') + str result, repl get_date_info(value, freq, &dts) for i in range(len(extra_fmts)): pat = extra_fmts[i][0] - repl = extra_fmts[i][1] + brepl = extra_fmts[i][1] if pat in fmt: - fmt = fmt.replace(pat, repl) + fmt = fmt.replace(pat, brepl) found_pat[i] = True formatted = c_strftime(&dts, fmt) @@ -2234,7 +2235,7 @@ cdef class _Period: object_state = None, self.freq, self.ordinal return (Period, object_state) - def strftime(self, fmt): + def strftime(self, fmt: str) -> str: """ Returns the string representation of the :class:`Period`, depending on the selected ``fmt``. ``fmt`` must be a string diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 05aac976d54db..5b37ebb42aecc 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -1097,6 +1097,8 @@ def __call__(self, x, pos=0): return "" else: fmt = self.formatdict.pop(x, "") + if isinstance(fmt, np.bytes_): + fmt = fmt.decode("utf-8") return Period(ordinal=int(x), freq=self.freq).strftime(fmt) From 7312b1b49b26359b6306d6be203770e0f4644260 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 21 Dec 2019 11:58:01 -0800 Subject: [PATCH 2/4] de-duplicate get_rule_month, stronger typingi --- pandas/_libs/tslibs/frequencies.pxd | 2 +- pandas/_libs/tslibs/frequencies.pyx | 6 +++--- pandas/_libs/tslibs/parsing.pyx | 27 +++------------------------ 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index 4e7949e55c836..6ec67ce250505 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -cpdef object get_rule_month(object source, object default=*) +cpdef str get_rule_month(object source, str default=*) cpdef get_freq_code(freqstr) cpdef object get_freq(object freq) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 660f4ddcec736..d60f5cfd3f8c1 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -485,18 +485,18 @@ cdef bint _is_weekly(str rule): # ---------------------------------------------------------------------- -cpdef object get_rule_month(object source, object default='DEC'): +cpdef str get_rule_month(object source, str default="DEC"): """ Return starting month of given freq, default is December. Parameters ---------- source : object - default : object (default "DEC") + default : str, default "DEC" Returns ------- - rule_month: object (usually string) + rule_month: str Examples -------- diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 2bc0d3d29ca17..801511aa4ef68 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -37,6 +37,7 @@ from pandas._config import get_option from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS from pandas._libs.tslibs.nattype import nat_strings, NaT from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size +from pandas._libs.tslibs.frequencies cimport get_rule_month cdef extern from "../src/headers/portable.h": int getdigit_ascii(char c, int default) nogil @@ -429,7 +430,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, if freq is not None: # hack attack, #1228 try: - mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1 + mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 except (KeyError, ValueError): raise DateParseError(f'Unable to retrieve month ' f'information from given ' @@ -469,7 +470,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, raise ValueError(f'Unable to parse {date_string}') -cdef dateutil_parse(object timestr, object default, ignoretz=False, +cdef dateutil_parse(str timestr, object default, ignoretz=False, tzinfos=None, dayfirst=None, yearfirst=None): """ lifted from dateutil to get resolution""" @@ -532,28 +533,6 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, return ret, reso -cdef str _get_rule_month(object source): - """ - Return starting month of given freq, default is December. - - Example - ------- - >>> _get_rule_month('D') - 'DEC' - - >>> _get_rule_month('A-JAN') - 'JAN' - """ - if hasattr(source, 'freqstr'): - source = source.freqstr - source = source.upper() - if '-' not in source: - # Default is December - return "DEC" - else: - return source.split('-')[1] - - # ---------------------------------------------------------------------- # Parsing for type-inference From 886e03556127bd89e498754ecf21d6b88a465fba Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 21 Dec 2019 13:28:27 -0800 Subject: [PATCH 3/4] cln, fix #22234 --- pandas/_libs/tslibs/parsing.pyx | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 801511aa4ef68..4b85fd0c1723e 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -3,7 +3,6 @@ Parsing functions for datetime and datetime-like strings. """ import re import time -from io import StringIO from libc.string cimport strchr @@ -11,9 +10,8 @@ import cython from cython import Py_ssize_t from cpython.object cimport PyObject_Str -from cpython.unicode cimport PyUnicode_Join -from cpython.datetime cimport datetime, datetime_new, import_datetime +from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo from cpython.version cimport PY_VERSION_HEX import_datetime() @@ -475,15 +473,14 @@ cdef dateutil_parse(str timestr, object default, ignoretz=False, """ lifted from dateutil to get resolution""" cdef: - object fobj, res, attr, ret, tzdata + object res, attr, ret, tzdata object reso = None dict repl = {} - fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst) + res = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) # dateutil 2.2 compat - if isinstance(res, tuple): # PyTuple_Check + if isinstance(res, tuple): res, _ = res if res is None: @@ -510,20 +507,22 @@ cdef dateutil_parse(str timestr, object default, ignoretz=False, ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + # Note: as of 1.0 this is not reached because + # we never pass tzinfos, see GH#22234 if callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) - if isinstance(tzdata, datetime.tzinfo): - tzinfo = tzdata + if isinstance(tzdata, tzinfo): + new_tzinfo = tzdata elif isinstance(tzdata, str): - tzinfo = _dateutil_tzstr(tzdata) + new_tzinfo = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): - tzinfo = tzoffset(res.tzname, tzdata) + new_tzinfo = tzoffset(res.tzname, tzdata) else: raise ValueError("offset must be tzinfo subclass, " "tz string, or int offset") - ret = ret.replace(tzinfo=tzinfo) + ret = ret.replace(tzinfo=new_tzinfo) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=_dateutil_tzlocal()) elif res.tzoffset == 0: @@ -986,6 +985,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) list_to_join[col_idx] = convert_to_unicode(item, False) PyArray_ITER_NEXT(it) - result_view[row_idx] = PyUnicode_Join(' ', list_to_join) + result_view[row_idx] = " ".join(list_to_join) return result From 7475d85422e6b7887505abd4818eb12665a90ff5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Dec 2019 08:00:12 -0800 Subject: [PATCH 4/4] typo, remove old dateutil compat code --- pandas/_libs/tslibs/parsing.pyx | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 4b85fd0c1723e..3705b0a41fe55 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -106,7 +106,7 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): Returns: -------- - datetime or Nont + datetime or None str or None Describing resolution of the parsed string. """ @@ -477,11 +477,7 @@ cdef dateutil_parse(str timestr, object default, ignoretz=False, object reso = None dict repl = {} - res = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) - - # dateutil 2.2 compat - if isinstance(res, tuple): - res, _ = res + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) if res is None: raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}")