diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index 5ceff32cfbac7..443b3867eb2b5 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -1,6 +1,7 @@ from numpy cimport ndarray, int64_t -cdef convert_to_tsobject(object, object, object, bint, bint) +from tslibs.conversion cimport convert_to_tsobject + cpdef convert_to_timedelta64(object, object) cdef bint _check_all_nulls(obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d2492064c900c..bd42bebfd7aa4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -94,7 +94,10 @@ from tslibs.timezones cimport ( from tslibs.fields import ( get_date_name_field, get_start_end_field, get_date_field, build_field_sarray) -from tslibs.conversion cimport tz_convert_single, _TSObject, _localize_tso +from tslibs.conversion cimport (tz_convert_single, _TSObject, + convert_to_tsobject, + convert_datetime_to_tsobject, + get_datetime64_nanos) from tslibs.conversion import ( tz_localize_to_utc, tz_convert, tz_convert_single) @@ -1235,215 +1238,6 @@ cdef inline bint is_timestamp(object o): return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) -# helper to extract datetime and int64 from several different possibilities -cdef convert_to_tsobject(object ts, object tz, object unit, - bint dayfirst, bint yearfirst): - """ - Extract datetime and int64 from any of: - - np.int64 (with unit providing a possible modifier) - - np.datetime64 - - a float (with unit providing a possible modifier) - - python int or long object (with unit providing a possible modifier) - - iso8601 string object - - python datetime object - - another timestamp object - """ - cdef: - _TSObject obj - - if tz is not None: - tz = maybe_get_tz(tz) - - obj = _TSObject() - - if is_string_object(ts): - return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - - if ts is None or ts is NaT: - obj.value = NPY_NAT - elif is_datetime64_object(ts): - if ts.view('i8') == NPY_NAT: - obj.value = NPY_NAT - else: - obj.value = _get_datetime64_nanos(ts) - dt64_to_dtstruct(obj.value, &obj.dts) - elif is_integer_object(ts): - if ts == NPY_NAT: - obj.value = NPY_NAT - else: - ts = ts * cast_from_unit(None, unit) - obj.value = ts - dt64_to_dtstruct(ts, &obj.dts) - elif is_float_object(ts): - if ts != ts or ts == NPY_NAT: - obj.value = NPY_NAT - else: - ts = cast_from_unit(ts, unit) - obj.value = ts - dt64_to_dtstruct(ts, &obj.dts) - elif PyDateTime_Check(ts): - return convert_datetime_to_tsobject(ts, tz) - elif PyDate_Check(ts): - # Keep the converter same as PyDateTime's - ts = datetime.combine(ts, datetime_time()) - return convert_datetime_to_tsobject(ts, tz) - elif getattr(ts, '_typ', None) == 'period': - raise ValueError("Cannot convert Period to Timestamp " - "unambiguously. Use to_timestamp") - else: - raise TypeError('Cannot convert input [{}] of type {} to ' - 'Timestamp'.format(ts, type(ts))) - - if obj.value != NPY_NAT: - check_dts_bounds(&obj.dts) - - if tz is not None: - _localize_tso(obj, tz) - - return obj - - -cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=0): - """ - Convert a datetime (or Timestamp) input `ts`, along with optional timezone - object `tz` to a _TSObject. - - The optional argument `nanos` allows for cases where datetime input - needs to be supplemented with higher-precision information. - - Parameters - ---------- - ts : datetime or Timestamp - Value to be converted to _TSObject - tz : tzinfo or None - timezone for the timezone-aware output - nanos : int32_t, default is 0 - nanoseconds supplement the precision of the datetime input ts - - Returns - ------- - obj : _TSObject - """ - cdef: - _TSObject obj = _TSObject() - - if tz is not None: - tz = maybe_get_tz(tz) - - # sort of a temporary hack - if ts.tzinfo is not None: - if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): - ts = tz.normalize(ts) - obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # tzoffset - try: - tz = ts.astimezone(tz).tzinfo - except: - pass - obj.value = pydatetime_to_dt64(ts, &obj.dts) - ts_offset = get_utcoffset(ts.tzinfo, ts) - obj.value -= int(ts_offset.total_seconds() * 1e9) - tz_offset = get_utcoffset(tz, ts) - obj.value += int(tz_offset.total_seconds() * 1e9) - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz - elif not is_utc(tz): - ts = _localize_pydatetime(ts, tz) - obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # UTC - obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = pytz.utc - else: - obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - - if obj.tzinfo is not None and not is_utc(obj.tzinfo): - offset = get_utcoffset(obj.tzinfo, ts) - obj.value -= int(offset.total_seconds() * 1e9) - - if is_timestamp(ts): - obj.value += ts.nanosecond - obj.dts.ps = ts.nanosecond * 1000 - - if nanos: - obj.value += nanos - obj.dts.ps = nanos * 1000 - - check_dts_bounds(&obj.dts) - return obj - - -cdef convert_str_to_tsobject(object ts, object tz, object unit, - bint dayfirst=False, bint yearfirst=False): - """ ts must be a string """ - - cdef: - _TSObject obj - int out_local = 0, out_tzoffset = 0 - datetime dt - - if tz is not None: - tz = maybe_get_tz(tz) - - obj = _TSObject() - - assert is_string_object(ts) - - if len(ts) == 0 or ts in nat_strings: - ts = NaT - elif ts == 'now': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns utc - ts = datetime.now(tz) - elif ts == 'today': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns a normalized datetime - ts = datetime.now(tz) - # equiv: datetime.today().replace(tzinfo=tz) - else: - try: - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) - obj.value = dtstruct_to_dt64(&obj.dts) - check_dts_bounds(&obj.dts) - if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') - if tz is None: - check_dts_bounds(&obj.dts) - return obj - else: - # Keep the converter same as PyDateTime's - obj = convert_to_tsobject(obj.value, obj.tzinfo, - None, 0, 0) - dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, - obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) - obj = convert_datetime_to_tsobject(dt, tz, - nanos=obj.dts.ps / 1000) - return obj - - else: - ts = obj.value - if tz is not None: - # shift for _localize_tso - ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise', - errors='raise')[0] - except ValueError: - try: - ts = parse_datetime_string(ts, dayfirst=dayfirst, - yearfirst=yearfirst) - except Exception: - raise ValueError("could not convert string to Timestamp") - - return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - - def _test_parse_iso8601(object ts): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used @@ -1864,7 +1658,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', iresult[i] = NPY_NAT else: try: - iresult[i] = _get_datetime64_nanos(val) + iresult[i] = get_datetime64_nanos(val) seen_datetime = 1 except ValueError: if is_coerce: @@ -2802,23 +2596,6 @@ cpdef int64_t _delta_to_nanoseconds(delta) except? -1: delta.microseconds) * 1000 -cdef inline _get_datetime64_nanos(object val): - cdef: - pandas_datetimestruct dts - PANDAS_DATETIMEUNIT unit - npy_datetime ival - - unit = get_datetime64_unit(val) - ival = get_datetime64_value(val) - - if unit != PANDAS_FR_ns: - pandas_datetime_to_datetimestruct(ival, unit, &dts) - check_dts_bounds(&dts) - return dtstruct_to_dt64(&dts) - else: - return ival - - def cast_to_nanoseconds(ndarray arr): cdef: Py_ssize_t i, n = arr.size diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index a042ee8949192..843a688a2630c 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- # cython: profile=False -from numpy cimport int64_t +from cpython.datetime cimport datetime -from datetime cimport pandas_datetimestruct +from numpy cimport int64_t, int32_t + +from np_datetime cimport pandas_datetimestruct cdef class _TSObject: @@ -12,6 +14,15 @@ cdef class _TSObject: int64_t value # numpy dt64 object tzinfo + +cdef convert_to_tsobject(object ts, object tz, object unit, + bint dayfirst, bint yearfirst) + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=*) + cdef void _localize_tso(_TSObject obj, object tz) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) + +cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 478d3bba80b00..61efc865112a9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -6,23 +6,41 @@ from cython cimport Py_ssize_t import numpy as np cimport numpy as np -from numpy cimport int64_t, ndarray +from numpy cimport int64_t, int32_t, ndarray np.import_array() import pytz -from cpython.datetime cimport datetime +# stdlib datetime imports +from datetime import time as datetime_time +from cpython.datetime cimport (datetime, tzinfo, + PyDateTime_Check, PyDate_Check, + PyDateTime_CheckExact, PyDateTime_IMPORT) +PyDateTime_IMPORT from np_datetime cimport (check_dts_bounds, pandas_datetimestruct, - dt64_to_dtstruct, dtstruct_to_dt64) + dt64_to_dtstruct, dtstruct_to_dt64, + pydatetime_to_dt64) + +from datetime cimport (pandas_datetime_to_datetimestruct, + PANDAS_DATETIMEUNIT, PANDAS_FR_ns, npy_datetime, + _string_to_dts, + get_datetime64_unit, get_datetime64_value) cimport util +from util cimport (is_string_object, + is_datetime64_object, + is_integer_object, is_float_object) +from timedeltas cimport cast_from_unit from timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, - get_utcoffset, get_dst_info, get_timezone) + get_utcoffset, get_dst_info, get_timezone, maybe_get_tz) +from parsing import parse_datetime_string + +from nattype import nat_strings, NaT # ---------------------------------------------------------------------- # Constants @@ -32,6 +50,30 @@ cdef int64_t DAY_NS = 86400000000000LL UTC = pytz.UTC +# ---------------------------------------------------------------------- +# Misc Helpers + + +# TODO: How to declare np.datetime64 as the input type? +cdef inline int64_t get_datetime64_nanos(object val) except? -1: + """ + Extract the value and unit from a np.datetime64 object, then convert the + value to nanoseconds if necessary. + """ + cdef: + pandas_datetimestruct dts + PANDAS_DATETIMEUNIT unit + npy_datetime ival + + unit = get_datetime64_unit(val) + ival = get_datetime64_value(val) + + if unit != PANDAS_FR_ns: + pandas_datetime_to_datetimestruct(ival, unit, &dts) + check_dts_bounds(&dts) + ival = dtstruct_to_dt64(&dts) + + return ival # ---------------------------------------------------------------------- # _TSObject Conversion @@ -48,6 +90,241 @@ cdef class _TSObject: return self.value +cdef convert_to_tsobject(object ts, object tz, object unit, + bint dayfirst, bint yearfirst): + """ + Extract datetime and int64 from any of: + - np.int64 (with unit providing a possible modifier) + - np.datetime64 + - a float (with unit providing a possible modifier) + - python int or long object (with unit providing a possible modifier) + - iso8601 string object + - python datetime object + - another timestamp object + """ + cdef: + _TSObject obj + + if tz is not None: + tz = maybe_get_tz(tz) + + obj = _TSObject() + + if is_string_object(ts): + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + if ts is None or ts is NaT: + obj.value = NPY_NAT + elif is_datetime64_object(ts): + if ts.view('i8') == NPY_NAT: + obj.value = NPY_NAT + else: + obj.value = get_datetime64_nanos(ts) + dt64_to_dtstruct(obj.value, &obj.dts) + elif is_integer_object(ts): + if ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = ts * cast_from_unit(None, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif is_float_object(ts): + if ts != ts or ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = cast_from_unit(ts, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif PyDateTime_Check(ts): + return convert_datetime_to_tsobject(ts, tz) + elif PyDate_Check(ts): + # Keep the converter same as PyDateTime's + ts = datetime.combine(ts, datetime_time()) + return convert_datetime_to_tsobject(ts, tz) + elif getattr(ts, '_typ', None) == 'period': + raise ValueError("Cannot convert Period to Timestamp " + "unambiguously. Use to_timestamp") + else: + raise TypeError('Cannot convert input [{}] of type {} to ' + 'Timestamp'.format(ts, type(ts))) + + if obj.value != NPY_NAT: + check_dts_bounds(&obj.dts) + + if tz is not None: + _localize_tso(obj, tz) + + return obj + + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=0): + """ + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + + if tz is not None: + tz = maybe_get_tz(tz) + + # sort of a temporary hack + if ts.tzinfo is not None: + if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): + ts = tz.normalize(ts) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # tzoffset + try: + tz = ts.astimezone(tz).tzinfo + except: + pass + obj.value = pydatetime_to_dt64(ts, &obj.dts) + ts_offset = get_utcoffset(ts.tzinfo, ts) + obj.value -= int(ts_offset.total_seconds() * 1e9) + tz_offset = get_utcoffset(tz, ts) + obj.value += int(tz_offset.total_seconds() * 1e9) + dt64_to_dtstruct(obj.value, &obj.dts) + obj.tzinfo = tz + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = pytz.utc + else: + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + obj.value -= int(offset.total_seconds() * 1e9) + + if not PyDateTime_CheckExact(ts): + # datetime instance but not datetime type --> Timestamp + obj.value += ts.nanosecond + obj.dts.ps = ts.nanosecond * 1000 + + if nanos: + obj.value += nanos + obj.dts.ps = nanos * 1000 + + check_dts_bounds(&obj.dts) + return obj + + +cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, + bint dayfirst=False, + bint yearfirst=False): + """ + Convert a string-like (bytes or unicode) input `ts`, along with optional + timezone object `tz` to a _TSObject. + + The optional arguments `dayfirst` and `yearfirst` are passed to the + dateutil parser. + + Parameters + ---------- + ts : bytes or unicode + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + dayfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "3/4/1975" as + April 3, as opposed to the standard US interpretation March 4. + yearfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "01/05/09" + as "May 9, 2001", as opposed to the default "Jan 5, 2009" + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj + int out_local = 0, out_tzoffset = 0 + datetime dt + + if tz is not None: + tz = maybe_get_tz(tz) + + obj = _TSObject() + + assert is_string_object(ts) + + if len(ts) == 0 or ts in nat_strings: + ts = NaT + elif ts == 'now': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns utc + ts = datetime.now(tz) + elif ts == 'today': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns a normalized datetime + ts = datetime.now(tz) + # equiv: datetime.today().replace(tzinfo=tz) + else: + try: + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) + obj.value = dtstruct_to_dt64(&obj.dts) + check_dts_bounds(&obj.dts) + if out_local == 1: + obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') + if tz is None: + check_dts_bounds(&obj.dts) + return obj + else: + # Keep the converter same as PyDateTime's + obj = convert_to_tsobject(obj.value, obj.tzinfo, + None, 0, 0) + dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject(dt, tz, + nanos=obj.dts.ps / 1000) + return obj + + else: + ts = obj.value + if tz is not None: + # shift for _localize_tso + ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, + ambiguous='raise', + errors='raise')[0] + except ValueError: + try: + ts = parse_datetime_string(ts, dayfirst=dayfirst, + yearfirst=yearfirst) + except Exception: + raise ValueError("could not convert string to Timestamp") + + return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + +# ---------------------------------------------------------------------- +# Localization + cdef inline void _localize_tso(_TSObject obj, object tz): """ Take a TSObject in UTC and localizes to timezone tz. @@ -55,6 +332,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): cdef: ndarray[int64_t] trans, deltas Py_ssize_t delta, posn + datetime dt if is_utc(tz): obj.tzinfo = tz @@ -99,8 +377,24 @@ cdef inline void _localize_tso(_TSObject obj, object tz): obj.tzinfo = tz +cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + NB: Unlike the version in tslib, this treats datetime and Timestamp objects + identically, i.e. discards nanos from Timestamps. + It also assumes that the `tz` input is not None. + """ + if tz == 'UTC' or tz is UTC: + return UTC.localize(dt) + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + # ---------------------------------------------------------------------- -# Localization / Timezone Conversion +# Timezone Conversion cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): @@ -126,6 +420,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): Py_ssize_t pos int64_t v, offset, utc_date pandas_datetimestruct dts + datetime dt if val == NPY_NAT: return val @@ -190,6 +485,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): ndarray[Py_ssize_t] posn int64_t v, offset, delta pandas_datetimestruct dts + datetime dt if len(vals) == 0: return np.array([], dtype=np.int64) @@ -281,6 +577,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return result +# TODO: cdef scalar version to call from convert_str_to_tsobject @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, @@ -303,6 +600,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, pandas_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' + datetime dt # Vectorized version of DstTzInfo.localize @@ -323,7 +621,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result[i] = v - delta return result - if util.is_string_object(ambiguous): + if is_string_object(ambiguous): if ambiguous == 'infer': infer_dst = True elif ambiguous == 'NaT':