From a9f2778d3b755592c5ab7cc3546416b46651c4be Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Sep 2017 19:19:11 -0700 Subject: [PATCH 1/7] Separate out _convert_datetime_to_tsobject --- pandas/_libs/tslib.pyx | 124 ++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 51 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4c34d0fcb1e5f..3adc20cffb3d9 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -728,7 +728,7 @@ class Timestamp(_Timestamp): # reconstruct & check bounds ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tzinfo=_tzinfo) - ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0) + ts = _convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: _check_dts_bounds(&dts) @@ -1475,52 +1475,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) elif PyDateTime_Check(ts): - if tz is not None: - # sort of a temporary hack - if ts.tzinfo is not None: - if (hasattr(tz, 'normalize') and - hasattr(ts.tzinfo, '_utcoffset')): - ts = tz.normalize(ts) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: #tzoffset - try: - tz = ts.astimezone(tz).tzinfo - except: - pass - obj.value = _pydatetime_to_dts(ts, &obj.dts) - ts_offset = get_utcoffset(ts.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(ts_offset) - tz_offset = get_utcoffset(tz, ts) - obj.value += _delta_to_nanoseconds(tz_offset) - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - elif not is_utc(tz): - ts = _localize_pydatetime(ts, tz) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # UTC - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = pytz.utc - else: - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - - if obj.tzinfo is not None and not is_utc(obj.tzinfo): - offset = get_utcoffset(obj.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(offset) - - if is_timestamp(ts): - obj.value += ts.nanosecond - obj.dts.ps = ts.nanosecond * 1000 - _check_dts_bounds(&obj.dts) - return obj + return _convert_datetime_to_tsobject(ts, tz) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - return convert_to_tsobject(ts, tz, None, 0, 0) + return _convert_datetime_to_tsobject(ts, tz) elif getattr(ts, '_typ', None) == 'period': raise ValueError( "Cannot convert Period to Timestamp " @@ -1538,12 +1497,64 @@ cdef convert_to_tsobject(object ts, object tz, object unit, return obj +cdef _TSObject _convert_datetime_to_tsobject(datetime ts, object tz): + cdef: + _TSObject obj = _TSObject() + + if tz is not None: + tz = maybe_get_tz(tz) + + # sort of a temporary hack + if ts.tzinfo is not None: + if (hasattr(tz, 'normalize') and + hasattr(ts.tzinfo, '_utcoffset')): + ts = tz.normalize(ts) + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # tzoffset + try: + tz = ts.astimezone(tz).tzinfo + except: + pass + obj.value = _pydatetime_to_dts(ts, &obj.dts) + ts_offset = get_utcoffset(ts.tzinfo, ts) + obj.value -= int(ts_offset.total_seconds() * 1e9) + tz_offset = get_utcoffset(tz, ts) + obj.value += int(tz_offset.total_seconds() * 1e9) + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) + obj.tzinfo = tz + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = pytz.utc + else: + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + obj.value -= int(offset.total_seconds() * 1e9) + + if is_timestamp(ts): + obj.value += ts.nanosecond + obj.dts.ps = ts.nanosecond * 1000 + + _check_dts_bounds(&obj.dts) + return obj + + cpdef convert_str_to_tsobject(object ts, object tz, object unit, dayfirst=False, yearfirst=False): """ ts must be a string """ cdef: - _TSObject obj + _TSObject obj, obj2 int out_local = 0, out_tzoffset = 0 if tz is not None: @@ -1558,11 +1569,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, elif ts == 'now': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc - ts = Timestamp.now(tz) + ts = datetime.now(tz) elif ts == 'today': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime - ts = Timestamp.today(tz) + ts = datetime.today(tz) + # equiv: datetime.today().replace(tzinfo=tz) else: try: _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) @@ -1577,7 +1589,17 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return obj else: # Keep the converter same as PyDateTime's - ts = Timestamp(obj.value, tz=obj.tzinfo) + obj2 = convert_to_tsobject(obj.value, obj.tzinfo, + None, 0, 0) + dtime = datetime(obj2.dts.year, obj2.dts.month, + obj2.dts.day, + obj2.dts.hour, obj2.dts.min, obj2.dts.sec, + obj2.dts.us, obj2.tzinfo) + obj2 = _convert_datetime_to_tsobject(dtime, tz) + obj2.value += obj.dts.ps / 1000 + obj2.dts.ps = obj.dts.ps + return obj2 + else: ts = obj.value if tz is not None: @@ -1726,7 +1748,7 @@ def datetime_to_datetime64(ndarray[object] values): else: inferred_tz = get_timezone(val.tzinfo) - _ts = convert_to_tsobject(val, None, None, 0, 0) + _ts = _convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value _check_dts_bounds(&_ts.dts) else: @@ -2046,7 +2068,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_datetime=1 if val.tzinfo is not None: if utc_convert: - _ts = convert_to_tsobject(val, None, 'ns', 0, 0) + _ts = _convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value try: _check_dts_bounds(&_ts.dts) @@ -2155,7 +2177,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: - _ts = convert_to_tsobject(py_dt, None, None, 0, 0) + _ts = _convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value except ValueError: if is_coerce: From 6ced748e629d28bf970d982f4bde231f49ad75f9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Sep 2017 21:09:09 -0700 Subject: [PATCH 2/7] typo fixup --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3adc20cffb3d9..6c0c2b0b4e020 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1573,7 +1573,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, elif ts == 'today': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime - ts = datetime.today(tz) + ts = datetime.now(tz) # equiv: datetime.today().replace(tzinfo=tz) else: try: From a7b66c1a4b534c2b2fd61712a30c8cc3babfebb6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 Sep 2017 08:55:06 -0700 Subject: [PATCH 3/7] Simplifications per reviewer suggestions --- pandas/_libs/tslib.pyx | 44 +++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6c0c2b0b4e020..2010468a00814 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -728,7 +728,7 @@ class Timestamp(_Timestamp): # reconstruct & check bounds ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tzinfo=_tzinfo) - ts = _convert_datetime_to_tsobject(ts_input, _tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: _check_dts_bounds(&dts) @@ -1475,11 +1475,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) elif PyDateTime_Check(ts): - return _convert_datetime_to_tsobject(ts, tz) + return convert_datetime_to_tsobject(ts, tz) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - return _convert_datetime_to_tsobject(ts, tz) + return convert_datetime_to_tsobject(ts, tz) elif getattr(ts, '_typ', None) == 'period': raise ValueError( "Cannot convert Period to Timestamp " @@ -1497,7 +1497,13 @@ cdef convert_to_tsobject(object ts, object tz, object unit, return obj -cdef _TSObject _convert_datetime_to_tsobject(datetime ts, object tz): +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=0): + """ + Extract datetime and int64 from any of: + - python datetime object + - another timestamp object + """ cdef: _TSObject obj = _TSObject() @@ -1545,6 +1551,10 @@ cdef _TSObject _convert_datetime_to_tsobject(datetime ts, object tz): obj.value += ts.nanosecond obj.dts.ps = ts.nanosecond * 1000 + if nanos: + obj.value += nanos + obj.dts.ps = nanos * 1000 + _check_dts_bounds(&obj.dts) return obj @@ -1554,7 +1564,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, """ ts must be a string """ cdef: - _TSObject obj, obj2 + _TSObject obj int out_local = 0, out_tzoffset = 0 if tz is not None: @@ -1589,16 +1599,14 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return obj else: # Keep the converter same as PyDateTime's - obj2 = convert_to_tsobject(obj.value, obj.tzinfo, - None, 0, 0) - dtime = datetime(obj2.dts.year, obj2.dts.month, - obj2.dts.day, - obj2.dts.hour, obj2.dts.min, obj2.dts.sec, - obj2.dts.us, obj2.tzinfo) - obj2 = _convert_datetime_to_tsobject(dtime, tz) - obj2.value += obj.dts.ps / 1000 - obj2.dts.ps = obj.dts.ps - return obj2 + obj = convert_to_tsobject(obj.value, obj.tzinfo, + None, 0, 0) + dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject(dtime, tz, + nanos=obj.dts.ps / 1000) + return obj else: ts = obj.value @@ -1748,7 +1756,7 @@ def datetime_to_datetime64(ndarray[object] values): else: inferred_tz = get_timezone(val.tzinfo) - _ts = _convert_datetime_to_tsobject(val, None) + _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value _check_dts_bounds(&_ts.dts) else: @@ -2068,7 +2076,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_datetime=1 if val.tzinfo is not None: if utc_convert: - _ts = _convert_datetime_to_tsobject(val, None) + _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value try: _check_dts_bounds(&_ts.dts) @@ -2177,7 +2185,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: - _ts = _convert_datetime_to_tsobject(py_dt, None) + _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value except ValueError: if is_coerce: From 43925772995c789af5620f278103aeecb7a2258b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 30 Sep 2017 18:44:14 -0700 Subject: [PATCH 4/7] verbosify docstring per reviewer request --- pandas/_libs/tslib.pyx | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2010468a00814..60b7c3cfcb6c7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1500,9 +1500,24 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, int32_t nanos=0): """ - Extract datetime and int64 from any of: - - python datetime object - - another timestamp object + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + + Returns + ------- + obj : _TSObject """ cdef: _TSObject obj = _TSObject() From 21ec8663d947427884f4625e717750a7c2f0c709 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 1 Oct 2017 11:01:13 -0700 Subject: [PATCH 5/7] at reviewer request, use inconvenient convenience routine --- pandas/_libs/tslib.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 60b7c3cfcb6c7..9f6aca0579176 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1616,9 +1616,8 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, # Keep the converter same as PyDateTime's obj = convert_to_tsobject(obj.value, obj.tzinfo, None, 0, 0) - dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, - obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) + dtime = create_datetime_from_ts(0, obj.dts, + obj.tzinfo, None) obj = convert_datetime_to_tsobject(dtime, tz, nanos=obj.dts.ps / 1000) return obj From 509699e45ffa11df4d1c15655bc71ebca2cfc9f1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 1 Oct 2017 12:52:01 -0700 Subject: [PATCH 6/7] Separate out _convert_tsobject_tz --- pandas/_libs/tslib.pyx | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9f6aca0579176..0057af072c746 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1613,14 +1613,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, _check_dts_bounds(&obj.dts) return obj else: - # Keep the converter same as PyDateTime's - obj = convert_to_tsobject(obj.value, obj.tzinfo, - None, 0, 0) - dtime = create_datetime_from_ts(0, obj.dts, - obj.tzinfo, None) - obj = convert_datetime_to_tsobject(dtime, tz, - nanos=obj.dts.ps / 1000) - return obj + return _convert_tsobject_tz(obj, tz) else: ts = obj.value @@ -1639,6 +1632,32 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) +cdef _TSObject _convert_tsobject_tz(_TSObject obj, object tz): + """ + Given a _TSObject with a FixedOffset tzinfo, convert to a new _TSObject + instance with the given timezone `tz` (or tz-naive if tz is None). + + Parameters + ---------- + obj : _TSObject + tz : tzinfo or None + + Returns + ------- + obj : _TSObject + """ + # Keep the converter same as PyDateTime's + cdef: + datetime dtime + + obj = convert_to_tsobject(obj.value, obj.tzinfo, None, 0, 0) + dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, + tz) + obj = convert_datetime_to_tsobject(dtime, tz, nanos=obj.dts.ps / 1000) + return obj + + def _test_parse_iso8601(object ts): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used From 9deb75b3707f64623cb7943f800b4d314481c3b2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 1 Oct 2017 18:52:23 -0700 Subject: [PATCH 7/7] revert _convert_tsobject_tz --- pandas/_libs/tslib.pyx | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0057af072c746..60b7c3cfcb6c7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1613,7 +1613,15 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, _check_dts_bounds(&obj.dts) return obj else: - return _convert_tsobject_tz(obj, tz) + # Keep the converter same as PyDateTime's + obj = convert_to_tsobject(obj.value, obj.tzinfo, + None, 0, 0) + dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject(dtime, tz, + nanos=obj.dts.ps / 1000) + return obj else: ts = obj.value @@ -1632,32 +1640,6 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) -cdef _TSObject _convert_tsobject_tz(_TSObject obj, object tz): - """ - Given a _TSObject with a FixedOffset tzinfo, convert to a new _TSObject - instance with the given timezone `tz` (or tz-naive if tz is None). - - Parameters - ---------- - obj : _TSObject - tz : tzinfo or None - - Returns - ------- - obj : _TSObject - """ - # Keep the converter same as PyDateTime's - cdef: - datetime dtime - - obj = convert_to_tsobject(obj.value, obj.tzinfo, None, 0, 0) - dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, - obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, - tz) - obj = convert_datetime_to_tsobject(dtime, tz, nanos=obj.dts.ps / 1000) - return obj - - def _test_parse_iso8601(object ts): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used