From bb5655382d52e5b2cce0d689f4d4821f43c61794 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 05:24:03 +0000 Subject: [PATCH 01/52] add values.dtype.kind==f branch to array_with_unit_datetime --- pandas/_libs/tslib.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e4128af62d06d..ac9c4cd524d04 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -227,7 +227,6 @@ def array_with_unit_to_datetime( m = cast_from_unit(None, unit) if is_raise: - # try a quick conversion to i8 # if we have nulls that are not type-compat # then need to iterate @@ -240,9 +239,17 @@ def array_with_unit_to_datetime( fvalues = iresult.astype('f8') * m need_to_iterate = False + # GH20445 + if values.dtype.kind == "f": + fresult = values.astype('f8', casting='same_kind', copy=False) + # fill by comparing to NPY_NAT constant + mask = fresult == NPY_NAT + fresult[mask] = 0.0 + fvalues = fvalues.astype('f8') * m # FIXME: this line segfaults rn + need_to_iterate = False + # check the bounds if not need_to_iterate: - if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") @@ -410,7 +417,6 @@ cpdef array_to_datetime( float offset_seconds, tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed - # specify error conditions assert is_raise or is_ignore or is_coerce From de81148f3dbf8754449657de803e1c179975cf33 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 05:35:09 +0000 Subject: [PATCH 02/52] remove unnecessary styling changes --- pandas/_libs/tslib.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ac9c4cd524d04..b4684b8d36751 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -227,6 +227,7 @@ def array_with_unit_to_datetime( m = cast_from_unit(None, unit) if is_raise: + # try a quick conversion to i8 # if we have nulls that are not type-compat # then need to iterate @@ -250,6 +251,7 @@ def array_with_unit_to_datetime( # check the bounds if not need_to_iterate: + if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") @@ -417,6 +419,7 @@ cpdef array_to_datetime( float offset_seconds, tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed + # specify error conditions assert is_raise or is_ignore or is_coerce From 98036709644c3a48ef9e11dc00a75002caccf91f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 05:48:15 +0000 Subject: [PATCH 03/52] added cast_from_unit definition for float --- pandas/_libs/tslibs/conversion.pyx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index adf1dfbc1ac72..0f1804139aaad 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -119,6 +119,15 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: frac = round(frac, p) return (base * m) + (frac * m) +cdef inline float cast_from_unit(object ts, str unit) except? -1: + """ return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p """ + cdef: + float m + int p + + # TO DO: fill in body + cpdef inline (int64_t, int) precision_from_unit(str unit): """ From a224e198f68e8185fde74cc45190db08f15af56e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 29 Jun 2020 00:36:05 +0000 Subject: [PATCH 04/52] to_datetime: added astyping for floats --- pandas/_libs/tslib.pyx | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b4684b8d36751..e65779b680adb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -246,20 +246,25 @@ def array_with_unit_to_datetime( # fill by comparing to NPY_NAT constant mask = fresult == NPY_NAT fresult[mask] = 0.0 - fvalues = fvalues.astype('f8') * m # FIXME: this line segfaults rn + m_as_float = m + fvalues = fresult.astype('f8') * m_as_float need_to_iterate = False # check the bounds if not need_to_iterate: - if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - result = (iresult * m).astype('M8[ns]') - iresult = result.view('i8') - iresult[mask] = NPY_NAT - return result, tz - + if values.dtype.kind == 'i': + result = (iresult * m).astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = NPY_NAT + return result, tz + elif values.dtype.kind == 'f': + result = (fresult * m_as_float).astype('M8[ns]') + fresult = result.view('f8') + fresult[mask] = NPY_NAT + return result, tz result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -419,7 +424,6 @@ cpdef array_to_datetime( float offset_seconds, tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed - # specify error conditions assert is_raise or is_ignore or is_coerce From a7bb0d1b72bcd9e8e30d00367f645da3aaebea41 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 29 Jun 2020 00:38:59 +0000 Subject: [PATCH 05/52] revert changes --- pandas/_libs/tslibs/conversion.pyx | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0f1804139aaad..ee04922eac043 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -119,17 +119,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: frac = round(frac, p) return (base * m) + (frac * m) -cdef inline float cast_from_unit(object ts, str unit) except? -1: - """ return a casting of the unit represented to nanoseconds - round the fractional part of a float to our precision, p """ - cdef: - float m - int p - - # TO DO: fill in body - - -cpdef inline (int64_t, int) precision_from_unit(str unit): +cpdef inline object precision_from_unit(str unit): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. From 20162fef04c3e49e45002363e81e4c45be5bf9b3 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 29 Jun 2020 00:39:35 +0000 Subject: [PATCH 06/52] revert changes --- pandas/_libs/tslibs/conversion.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ee04922eac043..15de374716827 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -119,6 +119,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: frac = round(frac, p) return (base * m) + (frac * m) + cpdef inline object precision_from_unit(str unit): """ Return a casting of the unit represented to nanoseconds + the precision From a332e3736ae0af1fcf5b79ec275b6dc71aea1326 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 29 Jun 2020 00:40:45 +0000 Subject: [PATCH 07/52] revert styling change --- pandas/_libs/tslib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e65779b680adb..03fbc6dabd16d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -424,6 +424,7 @@ cpdef array_to_datetime( float offset_seconds, tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed + # specify error conditions assert is_raise or is_ignore or is_coerce From 41f22fa1dc813d2f4e5665e10d50bdcd38f32d09 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 29 Jun 2020 00:42:29 +0000 Subject: [PATCH 08/52] _libs/tslib.pyx added comments --- pandas/_libs/tslib.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 03fbc6dabd16d..ff926f9ad9ba2 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -255,6 +255,7 @@ def array_with_unit_to_datetime( if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") + # GH20445 if values.dtype.kind == 'i': result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') @@ -424,7 +425,7 @@ cpdef array_to_datetime( float offset_seconds, tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed - + # specify error conditions assert is_raise or is_ignore or is_coerce From 0617b2a1952929774afcb57333dd37a36cc37180 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 1 Jul 2020 02:02:56 +0000 Subject: [PATCH 09/52] fixed string quotes --- pandas/_libs/tslib.pyx | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ff926f9ad9ba2..160b3b6d3e610 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -233,21 +233,21 @@ def array_with_unit_to_datetime( # then need to iterate if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant - iresult = values.astype('i8', casting='same_kind', copy=False) + iresult = values.astype("i8", casting="same_kind", copy=False) # fill by comparing to NPY_NAT constant mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = iresult.astype('f8') * m + fvalues = iresult.astype("f8") * m need_to_iterate = False # GH20445 if values.dtype.kind == "f": - fresult = values.astype('f8', casting='same_kind', copy=False) - # fill by comparing to NPY_NAT constant - mask = fresult == NPY_NAT + fresult = values.astype("f8", casting="same_kind", copy=False) + # fill by comparing to np.nan constant + mask = fresult == np.nan fresult[mask] = 0.0 - m_as_float = m - fvalues = fresult.astype('f8') * m_as_float + m_as_float = m + fvalues = fresult.astype("f8") * m_as_float need_to_iterate = False # check the bounds @@ -256,18 +256,18 @@ def array_with_unit_to_datetime( or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") # GH20445 - if values.dtype.kind == 'i': - result = (iresult * m).astype('M8[ns]') - iresult = result.view('i8') + if values.dtype.kind == "i": + result = (iresult * m).astype("M8[ns]") + iresult = result.view("i8") iresult[mask] = NPY_NAT return result, tz - elif values.dtype.kind == 'f': - result = (fresult * m_as_float).astype('M8[ns]') - fresult = result.view('f8') + elif values.dtype.kind == "f": + result = (fresult * m_as_float).astype("M8[ns]") + fresult = result.view("f8") fresult[mask] = NPY_NAT return result, tz - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') + result = np.empty(n, dtype="M8[ns]") + iresult = result.view("i8") try: for i in range(n): From a501aa08ff4eff2e4afb7925389043a9addfb73a Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 17:53:03 +0000 Subject: [PATCH 10/52] removed xfail tests --- pandas/tests/io/json/test_pandas.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c4db0170ecc90..b012755bae3c5 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1256,25 +1256,6 @@ def test_to_json_large_numbers(self, bigNum): expected = '{"0":{"articleId":' + str(bigNum) + "}}" assert json == expected - @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) - @pytest.mark.skipif(not compat.IS64, reason="GH-35279") - def test_read_json_large_numbers(self, bigNum): - # GH20599 - - series = Series(bigNum, dtype=object, index=["articleId"]) - json = '{"articleId":' + str(bigNum) + "}" - with pytest.raises(ValueError): - json = StringIO(json) - result = read_json(json) - tm.assert_series_equal(series, result) - - df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) - json = '{"0":{"articleId":' + str(bigNum) + "}}" - with pytest.raises(ValueError): - json = StringIO(json) - result = read_json(json) - tm.assert_frame_equal(df, result) - def test_read_json_large_numbers2(self): # GH18842 json = '{"articleId": "1404366058080022500245"}' From 9be156742eeaec8c3d0ce03b09b7b365b512aa29 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 20:59:58 +0000 Subject: [PATCH 11/52] change _libs/tslib.pyx --- pandas/_libs/tslib.pyx | 199 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 193 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 160b3b6d3e610..50ee10313f171 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -13,8 +13,12 @@ PyDateTime_IMPORT cimport numpy as cnp +<<<<<<< HEAD from numpy cimport float64_t, int64_t, ndarray +======= +from numpy cimport float64_t, int64_t, ndarray, uint8_t +>>>>>>> change _libs/tslib.pyx import numpy as np cnp.import_array() @@ -36,6 +40,12 @@ from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, + is_utc, + is_tzlocal, + utc_pytz as UTC, +) from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, @@ -51,9 +61,186 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timestamps import Timestamp +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_single, + tz_convert_utc_to_tzlocal, +) + # Note: this is the only non-tslibs intra-pandas dependency here from pandas._libs.missing cimport checknull_with_nat_and_na -from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single + + +cdef inline object create_datetime_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold, +): + """ + Convenience routine to construct a datetime.datetime from its parts. + """ + return datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold + ) + + +cdef inline object create_date_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.date from its parts. + """ + # GH 25057 add fold argument to match other func_create signatures + return date(dts.year, dts.month, dts.day) + + +cdef inline object create_time_from_ts( + int64_t value, + npy_datetimestruct dts, + tzinfo tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.time from its parts. + """ + return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def ints_to_pydatetime( + const int64_t[:] arr, + tzinfo tz=None, + object freq=None, + bint fold=False, + str box="datetime" +): + """ + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. + + Parameters + ---------- + arr : array of i8 + tz : str, optional + convert to this timezone + freq : str/Offset, optional + freq to convert + fold : bint, default is 0 + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + * If datetime, convert to datetime.datetime + * If date, convert to datetime.date + * If time, convert to datetime.time + * If Timestamp, convert to pandas.Timestamp + + Returns + ------- + ndarray of dtype specified by box + """ + cdef: + Py_ssize_t i, n = len(arr) + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t pos + npy_datetimestruct dts + object dt, new_tz + str typ + int64_t value, delta, local_value + ndarray[object] result = np.empty(n, dtype=object) + object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) + + if box == "date": + assert (tz is None), "tz should be None when converting to date" + + func_create = create_date_from_ts + elif box == "timestamp": + func_create = create_timestamp_from_ts + + if isinstance(freq, str): + freq = to_offset(freq) + elif box == "time": + func_create = create_time_from_ts + elif box == "datetime": + func_create = create_datetime_from_ts + else: + raise ValueError( + "box must be one of 'datetime', 'date', 'time' or 'timestamp'" + ) + + if is_utc(tz) or tz is None: + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + dt64_to_dtstruct(value, &dts) + result[i] = func_create(value, dts, tz, freq, fold) + elif is_tzlocal(tz): + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Python datetime objects do not support nanosecond + # resolution (yet, PEP 564). Need to compute new value + # using the i8 representation. + local_value = tz_convert_utc_to_tzlocal(value, tz) + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, tz, freq, fold) + else: + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + delta = deltas[0] + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + dt64_to_dtstruct(value + delta, &dts) + result[i] = func_create(value, dts, tz, freq, fold) + + elif typ == 'dateutil': + # no zone-name change for dateutil tzs - dst etc + # represented in single object. + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(value, side='right') - 1 + dt64_to_dtstruct(value + deltas[pos], &dts) + result[i] = func_create(value, dts, tz, freq, fold) + else: + # pytz + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(value, side='right') - 1 + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos]] + + dt64_to_dtstruct(value + deltas[pos], &dts) + result[i] = func_create(value, dts, new_tz, freq, fold) + + return result def _test_parse_iso8601(ts: str): @@ -77,7 +264,7 @@ def _test_parse_iso8601(ts: str): check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) + obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) return Timestamp(obj.value, tz=obj.tzinfo) else: return Timestamp(obj.value) @@ -171,8 +358,8 @@ def format_array_from_datetime( def array_with_unit_to_datetime( ndarray values, - str unit, - str errors="coerce" + object unit, + str errors='coerce' ): """ Convert the ndarray to datetime according to the time unit. @@ -192,7 +379,7 @@ def array_with_unit_to_datetime( ---------- values : ndarray of object Date-like objects to convert. - unit : str + unit : object Time unit to use during conversion. errors : str, default 'raise' Error behavior when parsing. @@ -550,7 +737,7 @@ cpdef array_to_datetime( # dateutil.tz.tzoffset objects out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) - value = tz_localize_to_utc_single(value, tz) + value = tz_convert_single(value, tz, UTC) out_local = 0 out_tzoffset = 0 else: From 1030374322d4978fcbaa4b8f27816b69ef4b0ad6 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 21:08:56 +0000 Subject: [PATCH 12/52] revert merge error --- pandas/tests/io/json/test_pandas.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b012755bae3c5..483a1dd407291 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1250,11 +1250,21 @@ def test_to_json_large_numbers(self, bigNum): json = series.to_json() expected = '{"articleId":' + str(bigNum) + "}" assert json == expected + # GH 20599 + with pytest.raises(ValueError): + json = StringIO(json) + result = read_json(json) + tm.assert_series_equal(series, result) df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) json = df.to_json() expected = '{"0":{"articleId":' + str(bigNum) + "}}" assert json == expected + # GH 20599 + with pytest.raises(ValueError): + json = StringIO(json) + result = read_json(json) + tm.assert_frame_equal(df, result) def test_read_json_large_numbers2(self): # GH18842 From ea932a94aed08610232a8303d2b298ab1a2d2e1e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 21:10:35 +0000 Subject: [PATCH 13/52] revert merge error --- pandas/_libs/tslib.pyx | 141 ++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 86 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50ee10313f171..d63521d4c90ab 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -14,11 +14,15 @@ PyDateTime_IMPORT cimport numpy as cnp <<<<<<< HEAD +<<<<<<< HEAD from numpy cimport float64_t, int64_t, ndarray ======= from numpy cimport float64_t, int64_t, ndarray, uint8_t >>>>>>> change _libs/tslib.pyx +======= +from numpy cimport float64_t, int64_t, ndarray, uint8_t, intp_t +>>>>>>> revert merge error import numpy as np cnp.import_array() @@ -44,7 +48,6 @@ from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_utc, is_tzlocal, - utc_pytz as UTC, ) from pandas._libs.tslibs.conversion cimport ( _TSObject, @@ -62,8 +65,8 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single, tz_convert_utc_to_tzlocal, + tz_localize_to_utc_single, ) # Note: this is the only non-tslibs intra-pandas dependency here @@ -152,13 +155,15 @@ def ints_to_pydatetime( Py_ssize_t i, n = len(arr) ndarray[int64_t] trans int64_t[:] deltas - Py_ssize_t pos + intp_t[:] pos npy_datetimestruct dts object dt, new_tz str typ - int64_t value, delta, local_value + int64_t value, local_value, delta = NPY_NAT # dummy for delta ndarray[object] result = np.empty(n, dtype=object) object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) + bint use_utc = False, use_tzlocal = False, use_fixed = False + bint use_pytz = False if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -179,66 +184,45 @@ def ints_to_pydatetime( ) if is_utc(tz) or tz is None: - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, tz, freq, fold) + use_utc = True elif is_tzlocal(tz): - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - # Python datetime objects do not support nanosecond - # resolution (yet, PEP 564). Need to compute new value - # using the i8 representation. - local_value = tz_convert_utc_to_tzlocal(value, tz) - dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, tz, freq, fold) + use_tzlocal = True else: trans, deltas, typ = get_dst_info(tz) - - if typ not in ['pytz', 'dateutil']: + if typ not in ["pytz", "dateutil"]: # static/fixed; in this case we know that len(delta) == 1 + use_fixed = True delta = deltas[0] - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - # Adjust datetime64 timestamp, recompute datetimestruct - dt64_to_dtstruct(value + delta, &dts) - result[i] = func_create(value, dts, tz, freq, fold) + else: + pos = trans.searchsorted(arr, side="right") - 1 + use_pytz = typ == "pytz" - elif typ == 'dateutil': - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(value, side='right') - 1 - dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, tz, freq, fold) + for i in range(n): + new_tz = tz + value = arr[i] + + if value == NPY_NAT: + result[i] = NaT else: - # pytz - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(value, side='right') - 1 - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos]] + if use_utc: + local_value = value + elif use_tzlocal: + local_value = tz_convert_utc_to_tzlocal(value, tz) + elif use_fixed: + local_value = value + delta + elif not use_pytz: + # i.e. dateutil + # no zone-name change for dateutil tzs - dst etc + # represented in single object. + local_value = value + deltas[pos[i]] + else: + # pytz + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos[i]]] + local_value = value + deltas[pos[i]] - dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, new_tz, freq, fold) + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, new_tz, freq, fold) return result @@ -264,7 +248,7 @@ def _test_parse_iso8601(ts: str): check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) + obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) return Timestamp(obj.value, tz=obj.tzinfo) else: return Timestamp(obj.value) @@ -358,8 +342,8 @@ def format_array_from_datetime( def array_with_unit_to_datetime( ndarray values, - object unit, - str errors='coerce' + str unit, + str errors="coerce" ): """ Convert the ndarray to datetime according to the time unit. @@ -379,7 +363,7 @@ def array_with_unit_to_datetime( ---------- values : ndarray of object Date-like objects to convert. - unit : object + unit : str Time unit to use during conversion. errors : str, default 'raise' Error behavior when parsing. @@ -420,41 +404,26 @@ def array_with_unit_to_datetime( # then need to iterate if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant - iresult = values.astype("i8", casting="same_kind", copy=False) + iresult = values.astype('i8', casting='same_kind', copy=False) # fill by comparing to NPY_NAT constant mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = iresult.astype("f8") * m - need_to_iterate = False - - # GH20445 - if values.dtype.kind == "f": - fresult = values.astype("f8", casting="same_kind", copy=False) - # fill by comparing to np.nan constant - mask = fresult == np.nan - fresult[mask] = 0.0 - m_as_float = m - fvalues = fresult.astype("f8") * m_as_float + fvalues = iresult.astype('f8') * m need_to_iterate = False # check the bounds if not need_to_iterate: + if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - # GH20445 - if values.dtype.kind == "i": - result = (iresult * m).astype("M8[ns]") - iresult = result.view("i8") - iresult[mask] = NPY_NAT - return result, tz - elif values.dtype.kind == "f": - result = (fresult * m_as_float).astype("M8[ns]") - fresult = result.view("f8") - fresult[mask] = NPY_NAT - return result, tz - result = np.empty(n, dtype="M8[ns]") - iresult = result.view("i8") + result = (iresult * m).astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = NPY_NAT + return result, tz + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') try: for i in range(n): @@ -737,7 +706,7 @@ cpdef array_to_datetime( # dateutil.tz.tzoffset objects out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, UTC) + value = tz_localize_to_utc_single(value, tz) out_local = 0 out_tzoffset = 0 else: From 9d47f14922bbb087a94287d2c1d130d9112e02a6 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 21:24:38 +0000 Subject: [PATCH 14/52] simplified 'if not need_to_iterate' branch --- pandas/_libs/tslib.pyx | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d63521d4c90ab..7d14b22f99ef0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -404,11 +404,21 @@ def array_with_unit_to_datetime( # then need to iterate if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant - iresult = values.astype('i8', casting='same_kind', copy=False) + iresult = values.astype("i8", casting="same_kind", copy=False) # fill by comparing to NPY_NAT constant mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = iresult.astype('f8') * m + fvalues = iresult.astype("f8") * m + need_to_iterate = False + + # GH20445 + elif values.dtype.kind == "f": + # Note: this condition makes the casting="same_kind" redundant + fresult = values.astype("f8", casting="same_kind", copy=False) + # fill by comparing to NPY_NAT constant + mask = fresult == NPY_NAT + fresult[mask] = 0 + fvalues = fresult * (m) need_to_iterate = False # check the bounds @@ -417,7 +427,8 @@ def array_with_unit_to_datetime( if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - result = (iresult * m).astype('M8[ns]') + # GH20445 + result = fvalues.astype('M8[ns]') iresult = result.view('i8') iresult[mask] = NPY_NAT return result, tz From a959535346d37bf62dd6deec3e34151ab11ebf67 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 21:48:31 +0000 Subject: [PATCH 15/52] update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a49b29d691692..dd75be2f1bfef 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -866,10 +866,16 @@ Performance improvements - Performance improvement in reductions (``sum``, ``prod``, ``min``, ``max``) for nullable (integer and Boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`). - Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`) - Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`) +<<<<<<< HEAD - Performance improvement in arithmetic operations (``sub``, ``add``, ``mul``, ``div``) for :class:`MultiIndex` (:issue:`34297`) - Performance improvement in ``DataFrame[bool_indexer]`` when ``bool_indexer`` is a ``list`` (:issue:`33924`) - Significant performance improvement of :meth:`io.formats.style.Styler.render` with styles added with various ways such as :meth:`io.formats.style.Styler.apply`, :meth:`io.formats.style.Styler.applymap` or :meth:`io.formats.style.Styler.bar` (:issue:`19917`) +======= +- Performance improvement in arithmetic operations (sub, add, mul, div) for MultiIndex (:issue:`34297`) +- Performance improvement in `DataFrame[bool_indexer]` when `bool_indexer` is a list (:issue:`33924`) +- Performance improvement in :func:`to_datetime`, unit='s' for float64 (:issue:`20445`) +>>>>>>> update whatsnew .. --------------------------------------------------------------------------- .. _whatsnew_110.bug_fixes: From efbd6ba9a9013f768d3b4dc27ee1c118ea6b954a Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 22:04:44 +0000 Subject: [PATCH 16/52] fixed string quotes --- pandas/_libs/tslib.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7d14b22f99ef0..c596dc9f7075c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -428,13 +428,13 @@ def array_with_unit_to_datetime( or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") # GH20445 - result = fvalues.astype('M8[ns]') - iresult = result.view('i8') + result = fvalues.astype("M8[ns]") + iresult = result.view("i8") iresult[mask] = NPY_NAT return result, tz - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') + result = np.empty(n, dtype="M8[ns]") + iresult = result.view("i8") try: for i in range(n): From 859b9a55a174b89decd92edb7cc36b61adf2ed35 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 8 Jul 2020 22:25:00 +0000 Subject: [PATCH 17/52] removed trailing whitespace --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c596dc9f7075c..b0d3c5660a8e3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -428,7 +428,7 @@ def array_with_unit_to_datetime( or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") # GH20445 - result = fvalues.astype("M8[ns]") + result = fvalues.astype("M8[ns]") iresult = result.view("i8") iresult[mask] = NPY_NAT return result, tz From a4606a0f986a11aa3f05bce37576f5727c796fe7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 24 Jul 2020 17:06:08 +0000 Subject: [PATCH 18/52] rebase tslib.pyx to master --- pandas/_libs/tslib.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b0d3c5660a8e3..41eefe775a4bd 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -411,7 +411,6 @@ def array_with_unit_to_datetime( fvalues = iresult.astype("f8") * m need_to_iterate = False - # GH20445 elif values.dtype.kind == "f": # Note: this condition makes the casting="same_kind" redundant fresult = values.astype("f8", casting="same_kind", copy=False) @@ -427,7 +426,6 @@ def array_with_unit_to_datetime( if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - # GH20445 result = fvalues.astype("M8[ns]") iresult = result.view("i8") iresult[mask] = NPY_NAT From 1597253ac6bc4a913a8610a42437e24bdf535b01 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 24 Jul 2020 19:11:48 +0000 Subject: [PATCH 19/52] clean up + NPY_NAT->np.nan --- pandas/_libs/tslib.pyx | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 41eefe775a4bd..59d3b34b46c14 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -399,23 +399,21 @@ def array_with_unit_to_datetime( if is_raise: - # try a quick conversion to i8 + # try a quick conversion to i8/f8 # if we have nulls that are not type-compat # then need to iterate + if values.dtype.kind == "i": - # Note: this condition makes the casting="same_kind" redundant - iresult = values.astype("i8", casting="same_kind", copy=False) - # fill by comparing to NPY_NAT constant - mask = iresult == NPY_NAT + iresult = values.astype("i8", copy=False) + # fill missing values by comparing to np.nan + mask = iresult == np.nan iresult[mask] = 0 fvalues = iresult.astype("f8") * m need_to_iterate = False - elif values.dtype.kind == "f": - # Note: this condition makes the casting="same_kind" redundant - fresult = values.astype("f8", casting="same_kind", copy=False) - # fill by comparing to NPY_NAT constant - mask = fresult == NPY_NAT + fresult = values.astype("f8", copy=False) + # fill missing values by comparing to np.nan + mask = fresult == np.nan fresult[mask] = 0 fvalues = fresult * (m) need_to_iterate = False @@ -426,13 +424,13 @@ def array_with_unit_to_datetime( if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - result = fvalues.astype("M8[ns]") - iresult = result.view("i8") - iresult[mask] = NPY_NAT + result = fvalues.astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = np.nan return result, tz - result = np.empty(n, dtype="M8[ns]") - iresult = result.view("i8") + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') try: for i in range(n): From 28397b07f0c7a6cd01a1bcf4c442405e78ea9246 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 24 Jul 2020 19:28:46 +0000 Subject: [PATCH 20/52] added benchmarks --- asv_bench/benchmarks/timeseries.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index b494dbd8a38fa..cf40e046e0cfd 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -263,6 +263,19 @@ def time_lookup_and_cleanup(self): self.ts.index._cleanup() +class ToDatetimeFromIntsFloats: + def setup(self): + timestamp_seconds_int = Series(range(1521080307, 1521685107), dtype="int64") + timestamp_seconds_float = timestamp_seconds_int.astype("float64") + + def to_datetime_int(self): + to_datetime(timestamp_seconds_int, unit="s") + + # float64 should about the same as int64 + def to_datetime_float(self): + to_datetime(timestamp_seconds_float, unit="s") + + class ToDatetimeYYYYMMDD: def setup(self): rng = date_range(start="1/1/2000", periods=10000, freq="D") From 188868138e8b0c4abb22ab33ff9b25974f290770 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 24 Jul 2020 19:34:02 +0000 Subject: [PATCH 21/52] revert changes to whatsnew --- doc/source/whatsnew/v1.1.0.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index dd75be2f1bfef..a49b29d691692 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -866,16 +866,10 @@ Performance improvements - Performance improvement in reductions (``sum``, ``prod``, ``min``, ``max``) for nullable (integer and Boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`). - Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`) - Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`) -<<<<<<< HEAD - Performance improvement in arithmetic operations (``sub``, ``add``, ``mul``, ``div``) for :class:`MultiIndex` (:issue:`34297`) - Performance improvement in ``DataFrame[bool_indexer]`` when ``bool_indexer`` is a ``list`` (:issue:`33924`) - Significant performance improvement of :meth:`io.formats.style.Styler.render` with styles added with various ways such as :meth:`io.formats.style.Styler.apply`, :meth:`io.formats.style.Styler.applymap` or :meth:`io.formats.style.Styler.bar` (:issue:`19917`) -======= -- Performance improvement in arithmetic operations (sub, add, mul, div) for MultiIndex (:issue:`34297`) -- Performance improvement in `DataFrame[bool_indexer]` when `bool_indexer` is a list (:issue:`33924`) -- Performance improvement in :func:`to_datetime`, unit='s' for float64 (:issue:`20445`) ->>>>>>> update whatsnew .. --------------------------------------------------------------------------- .. _whatsnew_110.bug_fixes: From ba5d3b5f841fbbc395c53de024e49074c7744d34 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 1 Aug 2020 17:13:52 +0000 Subject: [PATCH 22/52] fixes merge conflicts --- pandas/_libs/tslib.pyx | 175 +--------------------------- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/tests/io/json/test_pandas.py | 19 ++- 3 files changed, 17 insertions(+), 179 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 59d3b34b46c14..f0ddde25fd450 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -13,16 +13,8 @@ PyDateTime_IMPORT cimport numpy as cnp -<<<<<<< HEAD -<<<<<<< HEAD -from numpy cimport float64_t, int64_t, ndarray - -======= from numpy cimport float64_t, int64_t, ndarray, uint8_t ->>>>>>> change _libs/tslib.pyx -======= -from numpy cimport float64_t, int64_t, ndarray, uint8_t, intp_t ->>>>>>> revert merge error + import numpy as np cnp.import_array() @@ -44,11 +36,6 @@ from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string -from pandas._libs.tslibs.timezones cimport ( - get_dst_info, - is_utc, - is_tzlocal, -) from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, @@ -64,167 +51,9 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timestamps import Timestamp -from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_utc_to_tzlocal, - tz_localize_to_utc_single, -) - # Note: this is the only non-tslibs intra-pandas dependency here from pandas._libs.missing cimport checknull_with_nat_and_na - - -cdef inline object create_datetime_from_ts( - int64_t value, - npy_datetimestruct dts, - tzinfo tz, - object freq, - bint fold, -): - """ - Convenience routine to construct a datetime.datetime from its parts. - """ - return datetime( - dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold - ) - - -cdef inline object create_date_from_ts( - int64_t value, - npy_datetimestruct dts, - tzinfo tz, - object freq, - bint fold -): - """ - Convenience routine to construct a datetime.date from its parts. - """ - # GH 25057 add fold argument to match other func_create signatures - return date(dts.year, dts.month, dts.day) - - -cdef inline object create_time_from_ts( - int64_t value, - npy_datetimestruct dts, - tzinfo tz, - object freq, - bint fold -): - """ - Convenience routine to construct a datetime.time from its parts. - """ - return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def ints_to_pydatetime( - const int64_t[:] arr, - tzinfo tz=None, - object freq=None, - bint fold=False, - str box="datetime" -): - """ - Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. - - Parameters - ---------- - arr : array of i8 - tz : str, optional - convert to this timezone - freq : str/Offset, optional - freq to convert - fold : bint, default is 0 - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time - - .. versionadded:: 1.1.0 - box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' - * If datetime, convert to datetime.datetime - * If date, convert to datetime.date - * If time, convert to datetime.time - * If Timestamp, convert to pandas.Timestamp - - Returns - ------- - ndarray of dtype specified by box - """ - cdef: - Py_ssize_t i, n = len(arr) - ndarray[int64_t] trans - int64_t[:] deltas - intp_t[:] pos - npy_datetimestruct dts - object dt, new_tz - str typ - int64_t value, local_value, delta = NPY_NAT # dummy for delta - ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) - bint use_utc = False, use_tzlocal = False, use_fixed = False - bint use_pytz = False - - if box == "date": - assert (tz is None), "tz should be None when converting to date" - - func_create = create_date_from_ts - elif box == "timestamp": - func_create = create_timestamp_from_ts - - if isinstance(freq, str): - freq = to_offset(freq) - elif box == "time": - func_create = create_time_from_ts - elif box == "datetime": - func_create = create_datetime_from_ts - else: - raise ValueError( - "box must be one of 'datetime', 'date', 'time' or 'timestamp'" - ) - - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(arr, side="right") - 1 - use_pytz = typ == "pytz" - - for i in range(n): - new_tz = tz - value = arr[i] - - if value == NPY_NAT: - result[i] = NaT - else: - if use_utc: - local_value = value - elif use_tzlocal: - local_value = tz_convert_utc_to_tzlocal(value, tz) - elif use_fixed: - local_value = value + delta - elif not use_pytz: - # i.e. dateutil - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - local_value = value + deltas[pos[i]] - else: - # pytz - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos[i]]] - local_value = value + deltas[pos[i]] - - dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, new_tz, freq, fold) - - return result +from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single def _test_parse_iso8601(ts: str): diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 15de374716827..adf1dfbc1ac72 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -120,7 +120,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: return (base * m) + (frac * m) -cpdef inline object precision_from_unit(str unit): +cpdef inline (int64_t, int) precision_from_unit(str unit): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 483a1dd407291..c4db0170ecc90 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1250,17 +1250,26 @@ def test_to_json_large_numbers(self, bigNum): json = series.to_json() expected = '{"articleId":' + str(bigNum) + "}" assert json == expected - # GH 20599 + + df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) + json = df.to_json() + expected = '{"0":{"articleId":' + str(bigNum) + "}}" + assert json == expected + + @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) + @pytest.mark.skipif(not compat.IS64, reason="GH-35279") + def test_read_json_large_numbers(self, bigNum): + # GH20599 + + series = Series(bigNum, dtype=object, index=["articleId"]) + json = '{"articleId":' + str(bigNum) + "}" with pytest.raises(ValueError): json = StringIO(json) result = read_json(json) tm.assert_series_equal(series, result) df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0]) - json = df.to_json() - expected = '{"0":{"articleId":' + str(bigNum) + "}}" - assert json == expected - # GH 20599 + json = '{"0":{"articleId":' + str(bigNum) + "}}" with pytest.raises(ValueError): json = StringIO(json) result = read_json(json) From c6d77465d483356ff51406b5247d15d45ee25f22 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 1 Aug 2020 17:15:17 +0000 Subject: [PATCH 23/52] fixes merge conflicts --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index f0ddde25fd450..74345ca23b773 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -13,7 +13,7 @@ PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray, uint8_t +from numpy cimport float64_t, int64_t, ndarray import numpy as np From 7f684481972c8e4ccf0a5ae31ce5e67239c04213 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 2 Aug 2020 04:16:16 +0000 Subject: [PATCH 24/52] rewrote cast in analogy to precision_from_unit --- pandas/_libs/tslib.pyx | 26 +++++++++++++++++--------- pandas/_libs/tslibs/conversion.pxd | 1 + 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 74345ca23b773..839854ebb3ade 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -39,6 +39,7 @@ from pandas._libs.tslibs.parsing import parse_datetime_string from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, + precision_from_unit, convert_datetime_to_tsobject, get_datetime64_nanos, ) @@ -205,6 +206,7 @@ def array_with_unit_to_datetime( cdef: Py_ssize_t i, j, n=len(values) int64_t m + int prec = 0 ndarray[float64_t] fvalues bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' @@ -224,10 +226,9 @@ def array_with_unit_to_datetime( result, tz = array_to_datetime(values.astype(object), errors=errors) return result, tz - m = cast_from_unit(None, unit) + m, p = precision_from_unit(unit) if is_raise: - # try a quick conversion to i8/f8 # if we have nulls that are not type-compat # then need to iterate @@ -237,25 +238,32 @@ def array_with_unit_to_datetime( # fill missing values by comparing to np.nan mask = iresult == np.nan iresult[mask] = 0 - fvalues = iresult.astype("f8") * m + fvalues = iresult.astype("f8") need_to_iterate = False elif values.dtype.kind == "f": fresult = values.astype("f8", copy=False) # fill missing values by comparing to np.nan mask = fresult == np.nan fresult[mask] = 0 - fvalues = fresult * (m) + fvalues = fresult need_to_iterate = False - # check the bounds if not need_to_iterate: - + # check the bounds if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - result = fvalues.astype('M8[ns]') - iresult = result.view('i8') - iresult[mask] = np.nan + + if values.dtype.kind == 'i': + result = iresult * m + + elif values.dtype.kind == 'f': + base = fresult.view("i8") + frac = fresult - base + if prec: + frac = round(frac, prec) + result = (base*m).astype("i8") + (frac*m).astype("i8") + return result, tz result = np.empty(n, dtype='M8[ns]') diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 73772e5ab4577..56f5481b7e781 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -24,5 +24,6 @@ cdef int64_t get_datetime64_nanos(object val) except? -1 cpdef datetime localize_pydatetime(datetime dt, object tz) cdef int64_t cast_from_unit(object ts, str unit) except? -1 +cpdef (int64_t, int) precision_from_unit(str unit) cdef int64_t normalize_i8_stamp(int64_t local_val) nogil From d9fb88ff6ca79690fe1e9c4468e6d93acf25b896 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 2 Aug 2020 07:12:49 +0000 Subject: [PATCH 25/52] use np.isnan for floats --- pandas/_libs/tslib.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 839854ebb3ade..055a14d25ddb3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -235,15 +235,15 @@ def array_with_unit_to_datetime( if values.dtype.kind == "i": iresult = values.astype("i8", copy=False) - # fill missing values by comparing to np.nan - mask = iresult == np.nan + # fill missing values by comparing to NPY_NAT + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype("f8") need_to_iterate = False elif values.dtype.kind == "f": fresult = values.astype("f8", copy=False) # fill missing values by comparing to np.nan - mask = fresult == np.nan + mask = np.isnan(fresult) fresult[mask] = 0 fvalues = fresult need_to_iterate = False @@ -258,12 +258,13 @@ def array_with_unit_to_datetime( result = iresult * m elif values.dtype.kind == 'f': - base = fresult.view("i8") + base = fresult.astype("i8") frac = fresult - base if prec: frac = round(frac, prec) result = (base*m).astype("i8") + (frac*m).astype("i8") + result = result.astype('M8[ns]') return result, tz result = np.empty(n, dtype='M8[ns]') From b2119b7d3d2c8c736b742c44471991b66e64764b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 2 Aug 2020 07:31:21 +0000 Subject: [PATCH 26/52] revert to fill in mask in final result --- pandas/_libs/tslib.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 055a14d25ddb3..89e9091f31532 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -255,7 +255,7 @@ def array_with_unit_to_datetime( raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") if values.dtype.kind == 'i': - result = iresult * m + result = (iresult * m) elif values.dtype.kind == 'f': base = fresult.astype("i8") @@ -263,8 +263,11 @@ def array_with_unit_to_datetime( if prec: frac = round(frac, prec) result = (base*m).astype("i8") + (frac*m).astype("i8") - + result = result.astype('M8[ns]') + + iresult = result.view('i8') + iresult[mask] = NPY_NAT return result, tz result = np.empty(n, dtype='M8[ns]') From 2c39cd31c34bea1ec574288ce76ba2758ef8b6fa Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 2 Aug 2020 22:02:17 +0000 Subject: [PATCH 27/52] fix sas tests --- pandas/tests/io/sas/data/datetime.csv | 4 ++-- pandas/tests/io/sas/test_sas7bdat.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv index 6126f6d04eaf0..f0d82f7fc494e 100644 --- a/pandas/tests/io/sas/data/datetime.csv +++ b/pandas/tests/io/sas/data/datetime.csv @@ -1,5 +1,5 @@ Date1,Date2,DateTime,DateTimeHi,Taiw -1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145225,1912-01-01 1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 -2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854775,2262-04-11 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 8c14f9de9f61c..b7790be39800e 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -175,6 +175,8 @@ def test_date_time(datapath): ) # GH 19732: Timestamps imported from sas will incur floating point errors df.iloc[:, 3] = df.iloc[:, 3].dt.round("us") + df0["DateTimeHi"] = df0["DateTimeHi"].astype("object") + df["DateTimeHi"] = df["DateTimeHi"].astype("object") tm.assert_frame_equal(df, df0) @@ -250,12 +252,9 @@ def test_max_sas_date(datapath): { "text": ["max", "normal"], "dt_as_float": [253717747199.999, 1880323199.999], - "dt_as_dt": [ - datetime(9999, 12, 29, 23, 59, 59, 999000), - datetime(2019, 8, 1, 23, 59, 59, 999000), - ], + "dt_as_dt": [pd.NaT, datetime(2019, 8, 1, 23, 59, 59, 999000)], "date_as_float": [2936547.0, 21762.0], - "date_as_date": [datetime(9999, 12, 29), datetime(2019, 8, 1)], + "date_as_date": [pd.NaT, datetime(2019, 8, 1)], }, columns=["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"], ) @@ -287,9 +286,9 @@ def test_max_sas_date_iterator(datapath): { "text": ["max"], "dt_as_float": [253717747199.999], - "dt_as_dt": [datetime(9999, 12, 29, 23, 59, 59, 999000)], + "dt_as_dt": pd.to_datetime([pd.NaT]), "date_as_float": [2936547.0], - "date_as_date": [datetime(9999, 12, 29)], + "date_as_date": pd.to_datetime([pd.NaT]), }, columns=col_order, ), From 64c94fb7e1d56fd94ffc450f75eef4d9d1239f00 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 3 Aug 2020 15:00:41 +0000 Subject: [PATCH 28/52] rewrite cast, rounding, missing values --- pandas/_libs/tslib.pyx | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 89e9091f31532..065376ab4fca5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -242,8 +242,8 @@ def array_with_unit_to_datetime( need_to_iterate = False elif values.dtype.kind == "f": fresult = values.astype("f8", copy=False) - # fill missing values by comparing to np.nan - mask = np.isnan(fresult) + # fill missing values by comparing to NPY_NAT + mask = fresult == NPY_NAT fresult[mask] = 0 fvalues = fresult need_to_iterate = False @@ -255,19 +255,17 @@ def array_with_unit_to_datetime( raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") if values.dtype.kind == 'i': - result = (iresult * m) + result = (iresult * m).astype('M8[ns]') elif values.dtype.kind == 'f': - base = fresult.astype("i8") - frac = fresult - base + result = fresult * m if prec: - frac = round(frac, prec) - result = (base*m).astype("i8") + (frac*m).astype("i8") - - result = result.astype('M8[ns]') + result = round(result, prec) + result = result.astype("M8[ns]") iresult = result.view('i8') iresult[mask] = NPY_NAT + return result, tz result = np.empty(n, dtype='M8[ns]') From dd519dabff1d68e9844639390c44dd519bb23150 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 4 Aug 2020 00:53:28 +0000 Subject: [PATCH 29/52] change json test_date_unit --- pandas/tests/io/json/test_pandas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c4db0170ecc90..59e4cea9c4f1a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -915,9 +915,10 @@ def test_date_unit(self, unit, datetime_frame): result = read_json(json, date_unit=unit) tm.assert_frame_equal(result, df) + # GH 35027 - this no longer works # detect date unit - result = read_json(json, date_unit=None) - tm.assert_frame_equal(result, df) + # result = read_json(json, date_unit=None) + # tm.assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser From b69df7af2dab334bf245ffe4f0882603d81ed4fc Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 10 Sep 2020 19:30:33 +0000 Subject: [PATCH 30/52] revert changes to tests --- pandas/tests/io/json/test_pandas.py | 5 ++--- pandas/tests/io/sas/data/datetime.csv | 4 ++-- pandas/tests/io/sas/test_sas7bdat.py | 13 +++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 962376a7ad112..13152f01abb04 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -912,10 +912,9 @@ def test_date_unit(self, unit, datetime_frame): result = read_json(json, date_unit=unit) tm.assert_frame_equal(result, df) - # GH 35027 - this no longer works # detect date unit - # result = read_json(json, date_unit=None) - # tm.assert_frame_equal(result, df) + result = read_json(json, date_unit=None) + tm.assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv index f0d82f7fc494e..6126f6d04eaf0 100644 --- a/pandas/tests/io/sas/data/datetime.csv +++ b/pandas/tests/io/sas/data/datetime.csv @@ -1,5 +1,5 @@ Date1,Date2,DateTime,DateTimeHi,Taiw -1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145225,1912-01-01 +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 -2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854775,2262-04-11 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index b7790be39800e..8c14f9de9f61c 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -175,8 +175,6 @@ def test_date_time(datapath): ) # GH 19732: Timestamps imported from sas will incur floating point errors df.iloc[:, 3] = df.iloc[:, 3].dt.round("us") - df0["DateTimeHi"] = df0["DateTimeHi"].astype("object") - df["DateTimeHi"] = df["DateTimeHi"].astype("object") tm.assert_frame_equal(df, df0) @@ -252,9 +250,12 @@ def test_max_sas_date(datapath): { "text": ["max", "normal"], "dt_as_float": [253717747199.999, 1880323199.999], - "dt_as_dt": [pd.NaT, datetime(2019, 8, 1, 23, 59, 59, 999000)], + "dt_as_dt": [ + datetime(9999, 12, 29, 23, 59, 59, 999000), + datetime(2019, 8, 1, 23, 59, 59, 999000), + ], "date_as_float": [2936547.0, 21762.0], - "date_as_date": [pd.NaT, datetime(2019, 8, 1)], + "date_as_date": [datetime(9999, 12, 29), datetime(2019, 8, 1)], }, columns=["text", "dt_as_float", "dt_as_dt", "date_as_float", "date_as_date"], ) @@ -286,9 +287,9 @@ def test_max_sas_date_iterator(datapath): { "text": ["max"], "dt_as_float": [253717747199.999], - "dt_as_dt": pd.to_datetime([pd.NaT]), + "dt_as_dt": [datetime(9999, 12, 29, 23, 59, 59, 999000)], "date_as_float": [2936547.0], - "date_as_date": pd.to_datetime([pd.NaT]), + "date_as_date": [datetime(9999, 12, 29)], }, columns=col_order, ), From d37b45c93d0b40f2a83aafc8100d683a30b554f7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 10 Sep 2020 21:08:18 +0000 Subject: [PATCH 31/52] more refactoring --- pandas/_libs/tslib.pyx | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 065376ab4fca5..e676a0e1db09b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -219,9 +219,11 @@ def array_with_unit_to_datetime( assert is_ignore or is_coerce or is_raise - if unit == 'ns': - if issubclass(values.dtype.type, np.integer): - result = values.astype('M8[ns]') + if unit == "ns": + if issubclass(values.dtype.type, np.integer) or issubclass( + values.dtype.type, np.float + ): + result = values.astype("M8[ns]") else: result, tz = array_to_datetime(values.astype(object), errors=errors) return result, tz @@ -233,37 +235,27 @@ def array_with_unit_to_datetime( # if we have nulls that are not type-compat # then need to iterate - if values.dtype.kind == "i": + if values.dtype.kind == "i" or values.dtype.kind == "f": iresult = values.astype("i8", copy=False) # fill missing values by comparing to NPY_NAT mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = iresult.astype("f8") - need_to_iterate = False - elif values.dtype.kind == "f": - fresult = values.astype("f8", copy=False) - # fill missing values by comparing to NPY_NAT - mask = fresult == NPY_NAT - fresult[mask] = 0 - fvalues = fresult + fvalues = iresult.astype("f8") * m need_to_iterate = False if not need_to_iterate: # check the bounds - if ((fvalues < Timestamp.min.value).any() - or (fvalues > Timestamp.max.value).any()): + if (fvalues < Timestamp.min.value).any() or ( + fvalues > Timestamp.max.value + ).any(): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - if values.dtype.kind == 'i': - result = (iresult * m).astype('M8[ns]') + if values.dtype.kind == "f" and prec: + fvalues = round(fvalues, prec) - elif values.dtype.kind == 'f': - result = fresult * m - if prec: - result = round(result, prec) - result = result.astype("M8[ns]") + result = fvalues.astype("M8[ns]") - iresult = result.view('i8') + iresult = result.view("i8") iresult[mask] = NPY_NAT return result, tz From 05fab52b69929f8d7f7d0465208fef61e8fc015c Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 10 Sep 2020 21:46:35 +0000 Subject: [PATCH 32/52] switch np.float -> np.float_ --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e676a0e1db09b..672ea6e60c568 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -221,7 +221,7 @@ def array_with_unit_to_datetime( if unit == "ns": if issubclass(values.dtype.type, np.integer) or issubclass( - values.dtype.type, np.float + values.dtype.type, np.float_ ): result = values.astype("M8[ns]") else: From 38a533f3f5851bfb85953bd9af952f8eb954869e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 10 Sep 2020 21:47:00 +0000 Subject: [PATCH 33/52] rounding now works --- pandas/tests/tools/test_to_datetime.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index d2049892705ea..df16f37201719 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1217,10 +1217,8 @@ def test_unit_mixed(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_unit_rounding(self, cache): - # GH 14156: argument will incur floating point errors but no - # premature rounding result = pd.to_datetime(1434743731.8770001, unit="s", cache=cache) - expected = pd.Timestamp("2015-06-19 19:55:31.877000093") + expected = pd.Timestamp("2015-06-19 19:55:31") assert result == expected @pytest.mark.parametrize("cache", [True, False]) From b1d8149a137669d5b8491f7b4a05e6dc60f05f03 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 10 Sep 2020 22:32:05 +0000 Subject: [PATCH 34/52] rewrite rounding step in array_with_unit_to_datetime --- pandas/_libs/tslib.pyx | 4 ++-- pandas/tests/tools/test_to_datetime.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 672ea6e60c568..5686040a03835 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -240,7 +240,7 @@ def array_with_unit_to_datetime( # fill missing values by comparing to NPY_NAT mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = iresult.astype("f8") * m + fvalues = values.astype("f8") * m need_to_iterate = False if not need_to_iterate: @@ -250,7 +250,7 @@ def array_with_unit_to_datetime( ).any(): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - if values.dtype.kind == "f" and prec: + if prec: fvalues = round(fvalues, prec) result = fvalues.astype("M8[ns]") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index df16f37201719..b21f89fc6bc86 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1218,7 +1218,7 @@ def test_unit_mixed(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_unit_rounding(self, cache): result = pd.to_datetime(1434743731.8770001, unit="s", cache=cache) - expected = pd.Timestamp("2015-06-19 19:55:31") + expected = pd.Timestamp("2015-06-19 19:55:31.877000192") assert result == expected @pytest.mark.parametrize("cache", [True, False]) From a6d8d9eae0796adcd2b3c9854026d20c4568acce Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Fri, 11 Sep 2020 12:54:11 -0400 Subject: [PATCH 35/52] Update pandas/_libs/tslib.pyx Co-authored-by: William Ayd --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5686040a03835..65b9efc202e62 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -220,7 +220,7 @@ def array_with_unit_to_datetime( assert is_ignore or is_coerce or is_raise if unit == "ns": - if issubclass(values.dtype.type, np.integer) or issubclass( + if issubclass(values.dtype.type, (np.integer, np.float)): values.dtype.type, np.float_ ): result = values.astype("M8[ns]") From e2e600b2c0df8850573680c30109279b6b4c7ac3 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Fri, 11 Sep 2020 12:55:23 -0400 Subject: [PATCH 36/52] Update pandas/_libs/tslib.pyx Co-authored-by: William Ayd --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 65b9efc202e62..70500128090c4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -246,7 +246,7 @@ def array_with_unit_to_datetime( if not need_to_iterate: # check the bounds if (fvalues < Timestamp.min.value).any() or ( - fvalues > Timestamp.max.value + (fvalues > Timestamp.max.value).any() ).any(): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") From c7a3b0876a93db231384c881f656510df7538d2c Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 11 Sep 2020 17:15:43 +0000 Subject: [PATCH 37/52] fix typo --- pandas/_libs/tslib.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 70500128090c4..68e339c3e772c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -221,8 +221,6 @@ def array_with_unit_to_datetime( if unit == "ns": if issubclass(values.dtype.type, (np.integer, np.float)): - values.dtype.type, np.float_ - ): result = values.astype("M8[ns]") else: result, tz = array_to_datetime(values.astype(object), errors=errors) @@ -247,7 +245,7 @@ def array_with_unit_to_datetime( # check the bounds if (fvalues < Timestamp.min.value).any() or ( (fvalues > Timestamp.max.value).any() - ).any(): + ): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") if prec: From c0c31cac1f16b6ab9e4cbf4ffb90d3d37746514f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 11 Sep 2020 17:36:47 +0000 Subject: [PATCH 38/52] silence numpy-dev warning --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 68e339c3e772c..45348ddf19c64 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -220,7 +220,7 @@ def array_with_unit_to_datetime( assert is_ignore or is_coerce or is_raise if unit == "ns": - if issubclass(values.dtype.type, (np.integer, np.float)): + if issubclass(values.dtype.type, (np.integer, np.float_)): result = values.astype("M8[ns]") else: result, tz = array_to_datetime(values.astype(object), errors=errors) From 59290a0b30c4558d31bf30bce88f35664ad32c3b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 02:22:00 +0000 Subject: [PATCH 39/52] feedback --- pandas/_libs/tslib.pyx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 45348ddf19c64..c6a7a619c66a4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -248,10 +248,13 @@ def array_with_unit_to_datetime( ): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - if prec: - fvalues = round(fvalues, prec) + if values.dtype.kind == "i": + result = (iresult * m).astype('M8[ns]') - result = fvalues.astype("M8[ns]") + if values.dtype.kind == "f": + if prec: + fvalues = round(fvalues, prec) + result = fvalues.astype("M8[ns]", copy=False) iresult = result.view("i8") iresult[mask] = NPY_NAT From 611dad0f80a733a2d10bdd6c7654e0efcf0988fa Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 04:45:53 +0000 Subject: [PATCH 40/52] fix handling of iNaT with astype(float) --- pandas/_libs/tslib.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c6a7a619c66a4..674075e02f896 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -238,7 +238,7 @@ def array_with_unit_to_datetime( # fill missing values by comparing to NPY_NAT mask = iresult == NPY_NAT iresult[mask] = 0 - fvalues = values.astype("f8") * m + fvalues = iresult.astype("f8") * m need_to_iterate = False if not need_to_iterate: @@ -252,10 +252,12 @@ def array_with_unit_to_datetime( result = (iresult * m).astype('M8[ns]') if values.dtype.kind == "f": + fresult = (values*m).astype("f8") + fresult[mask] = 0 if prec: - fvalues = round(fvalues, prec) - result = fvalues.astype("M8[ns]", copy=False) - + fresult = round(fresult, prec) + result = fresult.astype("M8[ns]", copy=False) + iresult = result.view("i8") iresult[mask] = NPY_NAT From 63fa94b777267ff0e90536e51372928e73efb064 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 05:16:47 +0000 Subject: [PATCH 41/52] fix floating point errors in sas datetime test --- pandas/tests/io/sas/data/datetime.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv index 6126f6d04eaf0..f0d82f7fc494e 100644 --- a/pandas/tests/io/sas/data/datetime.csv +++ b/pandas/tests/io/sas/data/datetime.csv @@ -1,5 +1,5 @@ Date1,Date2,DateTime,DateTimeHi,Taiw -1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145225,1912-01-01 1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 -2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854775,2262-04-11 From 76cd0eb3721c53e3c35692ac63d6ea07cfdf969f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 01:36:56 +0000 Subject: [PATCH 42/52] round floating point error manually in test --- pandas/tests/tools/test_to_datetime.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b21f89fc6bc86..040a89aca8cca 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1452,6 +1452,7 @@ def test_to_datetime_unit(self): ] + [NaT] ) + result = result.round("ms") tm.assert_series_equal(result, expected) s = pd.concat( From b308ba71da2f00e81948cb1c52443b5a263b2947 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 01:44:47 +0000 Subject: [PATCH 43/52] add note in whatsnew 1.2 --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8b18b56929acd..77b6c717e7a86 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -222,6 +222,7 @@ Performance improvements - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`) - Performance improvements when creating :meth:`pd.Series.map` from a huge dictionary (:issue:`34717`) - Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`) +- Performance improvement in :meth:`pd.to_datetime` with non-`ns` time unit for `float` `dtype` columns (:issue:`20445`) .. --------------------------------------------------------------------------- From 1aa7bb26b094aa2dd502f43e9cbaeded9aa22715 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 03:21:56 +0000 Subject: [PATCH 44/52] remove trailing whitespaces --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 674075e02f896..de232521dfa47 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -257,7 +257,7 @@ def array_with_unit_to_datetime( if prec: fresult = round(fresult, prec) result = fresult.astype("M8[ns]", copy=False) - + iresult = result.view("i8") iresult[mask] = NPY_NAT From a3f42df798b151691f8b492436d5a45682a8bda8 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 03:24:49 +0000 Subject: [PATCH 45/52] fix typo in added benchmark --- asv_bench/benchmarks/timeseries.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index cf40e046e0cfd..1d2f89ac0e273 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -265,15 +265,17 @@ def time_lookup_and_cleanup(self): class ToDatetimeFromIntsFloats: def setup(self): - timestamp_seconds_int = Series(range(1521080307, 1521685107), dtype="int64") - timestamp_seconds_float = timestamp_seconds_int.astype("float64") + self.timestamp_seconds_int = Series( + range(1521080307, 1521685107), dtype="int64" + ) + self.timestamp_seconds_float = timestamp_seconds_int.astype("float64") def to_datetime_int(self): - to_datetime(timestamp_seconds_int, unit="s") + to_datetime(self.timestamp_seconds_int, unit="s") # float64 should about the same as int64 def to_datetime_float(self): - to_datetime(timestamp_seconds_float, unit="s") + to_datetime(self.timestamp_seconds_float, unit="s") class ToDatetimeYYYYMMDD: From 8837ff4ef4a2629df99e52ed00c8eb12b76e9f64 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 14:43:15 +0000 Subject: [PATCH 46/52] flake8 asv_bench --- asv_bench/benchmarks/timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 1d2f89ac0e273..a91e0386a0f3e 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -268,7 +268,7 @@ def setup(self): self.timestamp_seconds_int = Series( range(1521080307, 1521685107), dtype="int64" ) - self.timestamp_seconds_float = timestamp_seconds_int.astype("float64") + self.timestamp_seconds_float = self.timestamp_seconds_int.astype("float64") def to_datetime_int(self): to_datetime(self.timestamp_seconds_int, unit="s") From 1ff89d47b277a3723eab6c49ca93a5ef6920d5be Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 15:46:53 +0000 Subject: [PATCH 47/52] reorder imports --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index de232521dfa47..ca4006ef85403 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -39,9 +39,9 @@ from pandas._libs.tslibs.parsing import parse_datetime_string from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, - precision_from_unit, convert_datetime_to_tsobject, get_datetime64_nanos, + precision_from_unit, ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, From c238cece0bdb3b30e632d36d536c53ca311b5795 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 18 Sep 2020 17:41:52 +0000 Subject: [PATCH 48/52] styling fixes --- pandas/_libs/tslib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ca4006ef85403..e1d3f8719fd28 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -249,10 +249,10 @@ def array_with_unit_to_datetime( raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") if values.dtype.kind == "i": - result = (iresult * m).astype('M8[ns]') + result = (iresult * m).astype("M8[ns]") if values.dtype.kind == "f": - fresult = (values*m).astype("f8") + fresult = (values * m).astype("f8") fresult[mask] = 0 if prec: fresult = round(fresult, prec) From 416035ba512c4af0d938de7072f980e6009a04d4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 18 Sep 2020 17:59:59 +0000 Subject: [PATCH 49/52] restore/add comments re: floating point errors --- pandas/tests/tools/test_to_datetime.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 040a89aca8cca..819474e1f32e7 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1217,6 +1217,8 @@ def test_unit_mixed(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_unit_rounding(self, cache): + # GH 14156 & GH 20445: argument will incur floating point errors + # but no premature rounding result = pd.to_datetime(1434743731.8770001, unit="s", cache=cache) expected = pd.Timestamp("2015-06-19 19:55:31.877000192") assert result == expected @@ -1452,6 +1454,7 @@ def test_to_datetime_unit(self): ] + [NaT] ) + # GH20455 argument will incur floating point errors but no premature rounding result = result.round("ms") tm.assert_series_equal(result, expected) From 47c2b5ff2ab6cd8093249d8382de2aea98ce2aa7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 18 Sep 2020 18:42:09 +0000 Subject: [PATCH 50/52] rewrote added benchmark --- asv_bench/benchmarks/timeseries.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index a91e0386a0f3e..7b0a494886b2b 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -265,17 +265,25 @@ def time_lookup_and_cleanup(self): class ToDatetimeFromIntsFloats: def setup(self): - self.timestamp_seconds_int = Series( - range(1521080307, 1521685107), dtype="int64" - ) - self.timestamp_seconds_float = self.timestamp_seconds_int.astype("float64") + self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64") + self.ts_sec_float = self.ts_sec.astype("float64") + + self.ts_nanosec = 1_000_000 * self.ts_sec + self.ts_nanosec.float = self.ts_nanosec.astype("float64") + + # speed of int64 and float64 paths should be comparable + + def time_nanosec_int64(self): + to_datetime(self.ts_nanosec, unit="ns") + + def time_nanosec_float64(self): + to_datetime(self.ts_nanosec_float, unit="ns") - def to_datetime_int(self): - to_datetime(self.timestamp_seconds_int, unit="s") + def time_sec_int64(self): + to_datetime(self.ts_sec, unit="s") - # float64 should about the same as int64 - def to_datetime_float(self): - to_datetime(self.timestamp_seconds_float, unit="s") + def time_sec_float64(self): + to_datetime(self.ts_sec_float, unit="s") class ToDatetimeYYYYMMDD: From f216a43d2796ed78b54c516bc448add6f50288db Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 18 Sep 2020 19:31:07 +0000 Subject: [PATCH 51/52] typo --- asv_bench/benchmarks/timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 7b0a494886b2b..27c904dda5b45 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -269,7 +269,7 @@ def setup(self): self.ts_sec_float = self.ts_sec.astype("float64") self.ts_nanosec = 1_000_000 * self.ts_sec - self.ts_nanosec.float = self.ts_nanosec.astype("float64") + self.ts_nanosec_float = self.ts_nanosec.astype("float64") # speed of int64 and float64 paths should be comparable From bb8c35bff4d654ec4194375c5027b8a7fe396cf5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 19 Sep 2020 16:50:46 +0000 Subject: [PATCH 52/52] feedback --- pandas/_libs/tslib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e1d3f8719fd28..b1b38505b9476 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -221,7 +221,7 @@ def array_with_unit_to_datetime( if unit == "ns": if issubclass(values.dtype.type, (np.integer, np.float_)): - result = values.astype("M8[ns]") + result = values.astype("M8[ns]", copy=False) else: result, tz = array_to_datetime(values.astype(object), errors=errors) return result, tz @@ -251,7 +251,7 @@ def array_with_unit_to_datetime( if values.dtype.kind == "i": result = (iresult * m).astype("M8[ns]") - if values.dtype.kind == "f": + elif values.dtype.kind == "f": fresult = (values * m).astype("f8") fresult[mask] = 0 if prec: