diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 598def4e1d9fa..cbe6dd6c2322d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -188,7 +188,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, return result -def _test_parse_iso8601(object ts): +def _test_parse_iso8601(ts: str): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used only for testing, actual construction uses `convert_str_to_tsobject` diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c5315219b8422..2988d7bae9a5e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -444,15 +444,15 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, bint dayfirst=False, bint yearfirst=False): """ - Convert a string-like (bytes or unicode) input `ts`, along with optional - timezone object `tz` to a _TSObject. + Convert a string input `ts`, along with optional timezone object`tz` + to a _TSObject. The optional arguments `dayfirst` and `yearfirst` are passed to the dateutil parser. Parameters ---------- - ts : bytes or unicode + ts : str Value to be converted to _TSObject tz : tzinfo or None timezone for the timezone-aware output diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index 4e7949e55c836..6ec67ce250505 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -cpdef object get_rule_month(object source, object default=*) +cpdef str get_rule_month(object source, str default=*) cpdef get_freq_code(freqstr) cpdef object get_freq(object freq) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 660f4ddcec736..d60f5cfd3f8c1 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -485,18 +485,18 @@ cdef bint _is_weekly(str rule): # ---------------------------------------------------------------------- -cpdef object get_rule_month(object source, object default='DEC'): +cpdef str get_rule_month(object source, str default="DEC"): """ Return starting month of given freq, default is December. Parameters ---------- source : object - default : object (default "DEC") + default : str, default "DEC" Returns ------- - rule_month: object (usually string) + rule_month: str Examples -------- diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 020bcdf0a7b15..ebedee79405e5 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -72,6 +72,6 @@ cdef npy_datetime get_datetime64_value(object obj) nogil cdef npy_timedelta get_timedelta64_value(object obj) nogil cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil -cdef int _string_to_dts(object val, npy_datetimestruct* dts, +cdef int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index b9406074bb130..b59a1101e0bf7 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -167,7 +167,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): return dtstruct_to_dt64(dts) -cdef inline int _string_to_dts(object val, npy_datetimestruct* dts, +cdef inline int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1: cdef: diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ecf3e35c86d76..3705b0a41fe55 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -3,7 +3,6 @@ Parsing functions for datetime and datetime-like strings. """ import re import time -from io import StringIO from libc.string cimport strchr @@ -11,9 +10,8 @@ import cython from cython import Py_ssize_t from cpython.object cimport PyObject_Str -from cpython.unicode cimport PyUnicode_Join -from cpython.datetime cimport datetime, datetime_new, import_datetime +from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo from cpython.version cimport PY_VERSION_HEX import_datetime() @@ -37,6 +35,7 @@ from pandas._config import get_option from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS from pandas._libs.tslibs.nattype import nat_strings, NaT from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size +from pandas._libs.tslibs.frequencies cimport get_rule_month cdef extern from "../src/headers/portable.h": int getdigit_ascii(char c, int default) nogil @@ -86,16 +85,15 @@ cdef inline int _parse_4digit(const char* s): return result -cdef inline object _parse_delimited_date(object date_string, bint dayfirst): +cdef inline object _parse_delimited_date(str date_string, bint dayfirst): """ Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + At the beginning function tries to parse date in MM/DD/YYYY format, but if month > 12 - in DD/MM/YYYY (`dayfirst == False`). With `dayfirst == True` function makes an attempt to parse date in DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY - Note - ---- For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. For MM/YYYY: delimiter can be a space or one of /- If `date_string` can't be converted to date, then function returns @@ -104,11 +102,13 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): Parameters ---------- date_string : str - dayfirst : bint + dayfirst : bool Returns: -------- - datetime, resolution + datetime or None + str or None + Describing resolution of the parsed string. """ cdef: const char* buf @@ -156,18 +156,19 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): raise DateParseError(f"Invalid date specified ({month}/{day})") -cdef inline bint does_string_look_like_time(object parse_string): +cdef inline bint does_string_look_like_time(str parse_string): """ Checks whether given string is a time: it has to start either from H:MM or from HH:MM, and hour and minute values must be valid. Parameters ---------- - date_string : str + parse_string : str Returns: -------- - whether given string is a time + bool + Whether given string is potentially a time. """ cdef: const char* buf @@ -188,9 +189,10 @@ cdef inline bint does_string_look_like_time(object parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def parse_datetime_string(date_string, freq=None, dayfirst=False, +def parse_datetime_string(date_string: str, freq=None, dayfirst=False, yearfirst=False, **kwargs): - """parse datetime string, only returns datetime. + """ + Parse datetime string, only returns datetime. Also cares special handling matching time patterns. Returns @@ -270,16 +272,17 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): return res -cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, +cdef parse_datetime_string_with_reso(str date_string, freq=None, dayfirst=False, yearfirst=False): - """parse datetime string, only returns datetime + """ + Parse datetime string and try to identify its resolution. Returns ------- - parsed : datetime - parsed2 : datetime/dateutil.parser._result - reso : str - inferred resolution + datetime + datetime/dateutil.parser._result + str + Inferred resolution of the parsed string. Raises ------ @@ -315,18 +318,19 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, return parsed, parsed, reso -cpdef bint _does_string_look_like_datetime(object py_string): +cpdef bint _does_string_look_like_datetime(str py_string): """ Checks whether given string is a datetime: it has to start with '0' or be greater than 1000. Parameters ---------- - py_string: object + py_string: str Returns ------- - whether given string is a datetime + bool + Whether given string is potentially a datetime. """ cdef: const char *buf @@ -370,9 +374,6 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 assert isinstance(date_string, str) - # len(date_string) == 0 - # should be NaT??? - if date_string in nat_strings: return NaT, NaT, '' @@ -427,7 +428,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, if freq is not None: # hack attack, #1228 try: - mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1 + mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 except (KeyError, ValueError): raise DateParseError(f'Unable to retrieve month ' f'information from given ' @@ -467,21 +468,16 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, raise ValueError(f'Unable to parse {date_string}') -cdef dateutil_parse(object timestr, object default, ignoretz=False, +cdef dateutil_parse(str timestr, object default, ignoretz=False, tzinfos=None, dayfirst=None, yearfirst=None): """ lifted from dateutil to get resolution""" cdef: - object fobj, res, attr, ret, tzdata + object res, attr, ret, tzdata object reso = None dict repl = {} - fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst) - - # dateutil 2.2 compat - if isinstance(res, tuple): # PyTuple_Check - res, _ = res + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) if res is None: raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}") @@ -507,20 +503,22 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + # Note: as of 1.0 this is not reached because + # we never pass tzinfos, see GH#22234 if callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) - if isinstance(tzdata, datetime.tzinfo): - tzinfo = tzdata + if isinstance(tzdata, tzinfo): + new_tzinfo = tzdata elif isinstance(tzdata, str): - tzinfo = _dateutil_tzstr(tzdata) + new_tzinfo = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): - tzinfo = tzoffset(res.tzname, tzdata) + new_tzinfo = tzoffset(res.tzname, tzdata) else: raise ValueError("offset must be tzinfo subclass, " "tz string, or int offset") - ret = ret.replace(tzinfo=tzinfo) + ret = ret.replace(tzinfo=new_tzinfo) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=_dateutil_tzlocal()) elif res.tzoffset == 0: @@ -530,27 +528,6 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, return ret, reso -cdef object _get_rule_month(object source, object default='DEC'): - """ - Return starting month of given freq, default is December. - - Example - ------- - >>> _get_rule_month('D') - 'DEC' - - >>> _get_rule_month('A-JAN') - 'JAN' - """ - if hasattr(source, 'freqstr'): - source = source.freqstr - source = source.upper() - if '-' not in source: - return default - else: - return source.split('-')[1] - - # ---------------------------------------------------------------------- # Parsing for type-inference @@ -939,14 +916,14 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): Parameters ---------- - date_cols : tuple of numpy arrays + date_cols : tuple[ndarray] keep_trivial_numbers : bool, default True if True and len(date_cols) == 1, then conversion (to string from integer/float zero) is not performed Returns ------- - arr_of_rows : ndarray (dtype=object) + arr_of_rows : ndarray[object] Examples -------- @@ -1004,6 +981,6 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) list_to_join[col_idx] = convert_to_unicode(item, False) PyArray_ITER_NEXT(it) - result_view[row_idx] = PyUnicode_Join(' ', list_to_join) + result_view[row_idx] = " ".join(list_to_join) return result diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a6503c00a41bb..a8dabac1527b5 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1191,12 +1191,15 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: return dtstruct_to_dt64(&dts) -def period_format(int64_t value, int freq, object fmt=None): +cdef str period_format(int64_t value, int freq, object fmt=None): cdef: int freq_group if value == NPY_NAT: - return repr(NaT) + return "NaT" + + if isinstance(fmt, str): + fmt = fmt.encode("utf-8") if fmt is None: freq_group = get_freq_group(freq) @@ -1242,24 +1245,22 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] -cdef object _period_strftime(int64_t value, int freq, object fmt): +cdef str _period_strftime(int64_t value, int freq, bytes fmt): cdef: Py_ssize_t i npy_datetimestruct dts char *formatted - object pat, repl, result + bytes pat, brepl list found_pat = [False] * len(extra_fmts) int year, quarter - - if isinstance(fmt, unicode): - fmt = fmt.encode('utf-8') + str result, repl get_date_info(value, freq, &dts) for i in range(len(extra_fmts)): pat = extra_fmts[i][0] - repl = extra_fmts[i][1] + brepl = extra_fmts[i][1] if pat in fmt: - fmt = fmt.replace(pat, repl) + fmt = fmt.replace(pat, brepl) found_pat[i] = True formatted = c_strftime(&dts, fmt) @@ -2234,7 +2235,7 @@ cdef class _Period: object_state = None, self.freq, self.ordinal return (Period, object_state) - def strftime(self, fmt): + def strftime(self, fmt: str) -> str: """ Returns the string representation of the :class:`Period`, depending on the selected ``fmt``. ``fmt`` must be a string diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 05aac976d54db..5b37ebb42aecc 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -1097,6 +1097,8 @@ def __call__(self, x, pos=0): return "" else: fmt = self.formatdict.pop(x, "") + if isinstance(fmt, np.bytes_): + fmt = fmt.decode("utf-8") return Period(ordinal=int(x), freq=self.freq).strftime(fmt)