From 56b068ebf222048062c9d903a4795ec6df90e14f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 Oct 2019 16:54:45 -0700 Subject: [PATCH] Working vectorized isoformat --- pandas/_libs/tslibs/fields.pyx | 38 ++++++++++++++++++- pandas/core/arrays/datetimes.py | 18 ++++++--- .../indexes/datetimes/test_scalar_compat.py | 33 ++++++++++++++++ 3 files changed, 82 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2ed85595f7e3a..1693a2e936a1f 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -18,10 +18,15 @@ from pandas._libs.tslibs.ccalendar cimport ( get_day_of_year) from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, - td64_to_tdstruct) + td64_to_tdstruct, NPY_DATETIMEUNIT, NPY_FR_ns) from pandas._libs.tslibs.nattype cimport NPY_NAT +cdef extern from "./src/datetime/np_datetime_strings.h": + int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + NPY_DATETIMEUNIT base) + + def get_time_micros(ndarray[int64_t] dtindex): """ Return the number of microseconds in the time component of a @@ -43,6 +48,36 @@ def get_time_micros(ndarray[int64_t] dtindex): return micros +@cython.wraparound(False) +@cython.boundscheck(False) +def get_datetime_isoformats(ndarray[int64_t] dtindex) -> ndarray: + """ + Return isoformats for an array of datetimelike objects. + + Parameters + ---------- + dtindex : DatetimeArray + + Returns + ------- + Array of ISO formats + """ + cdef: + Py_ssize_t i, count = len(dtindex) + int64_t val, convert_status + npy_datetimestruct dts + char buf[34] # ns precision with UTC offset max length + + out = np.empty(count, dtype=object) + + for i in range(count): + dt64_to_dtstruct(dtindex[i], &dts); + # TODO: handle bad return + convert_status = make_iso_8601_datetime(&dts, buf, 34, NPY_FR_ns) + out[i] = buf.decode("UTF-8") + + return out + @cython.wraparound(False) @cython.boundscheck(False) def build_field_sarray(const int64_t[:] dtindex): @@ -128,7 +163,6 @@ def get_date_name_field(const int64_t[:] dtindex, object field, object locale=No dt64_to_dtstruct(dtindex[i], &dts) out[i] = names[dts.month].capitalize() - else: raise ValueError("Field {field} not supported".format(field=field)) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0335058a69c63..16b4b9482721c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -134,9 +134,11 @@ def f(self): return result if field in self._object_ops: - result = fields.get_date_name_field(values, field) - result = self._maybe_mask_results(result, fill_value=None) - + if field == "isoformat": + result = fields.get_datetime_isoformats(values) + else: + result = fields.get_date_name_field(values, field) + result = self._maybe_mask_results(result, fill_value=None) else: result = fields.get_date_field(values, field) result = self._maybe_mask_results( @@ -284,7 +286,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps "is_year_end", "is_leap_year", ] - _object_ops = ["weekday_name", "freq", "tz"] + _object_ops = ["weekday_name", "freq", "tz", "isoformat"] _field_ops = [ "year", "month", @@ -1522,7 +1524,13 @@ def date(self): The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0 """, ) - + isoformat = _field_accessor( + "isoformat", + "isoformat", + """ + ISO formatted string. + """, + ) dayofyear = _field_accessor( "dayofyear", "doy", diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 00310f4fba7c7..201261cf70287 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -64,6 +64,39 @@ def test_dti_timestamp_fields(self, field): result = getattr(Timestamp(idx[-1]), field) assert result == expected + @pytest.mark.parametrize( + "tz,expected_vals", + [ + ( + "utc", + [ + "2000-01-01T00:00:00.000000000Z", + "2000-01-02T00:00:00.000000000Z", + "2000-01-03T00:00:00.000000000Z", + ], + ), + # "US/Eastern", + # [ + # "2000-01-01T00:00:00.000000000-05:00", + # "2000-01-02T00:00:00.000000000-05:00", + # "2000-01-03T00:00:00.000000000-05:00", + # ], + ], + ) + def test_dti_isoformat_datetimes(self, tz, expected_vals): + dts = pd.date_range(start="2000-01-1", periods=3, freq="D", tz=tz) + result = pd.Series(dts).dt.isoformat + expected = pd.Series(expected_vals) + tm.assert_series_equal(result, expected) + + @pytest.mark.skip + def test_dti_isoformat_timedelts(self): + ... + + @pytest.mark.skip + def test_dti_isoformat_period_raises(self): + ... + def test_dti_timestamp_freq_fields(self): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100)