-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Vectorized ISO Format #28863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vectorized ISO Format #28863
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,10 +18,15 @@ from pandas._libs.tslibs.ccalendar cimport ( | |
get_day_of_year) | ||
from pandas._libs.tslibs.np_datetime cimport ( | ||
npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, | ||
td64_to_tdstruct) | ||
td64_to_tdstruct, NPY_DATETIMEUNIT, NPY_FR_ns) | ||
from pandas._libs.tslibs.nattype cimport NPY_NAT | ||
|
||
|
||
cdef extern from "./src/datetime/np_datetime_strings.h": | ||
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, | ||
NPY_DATETIMEUNIT base) | ||
|
||
|
||
def get_time_micros(ndarray[int64_t] dtindex): | ||
""" | ||
Return the number of microseconds in the time component of a | ||
|
@@ -43,6 +48,36 @@ def get_time_micros(ndarray[int64_t] dtindex): | |
return micros | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def get_datetime_isoformats(ndarray[int64_t] dtindex) -> ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC cython is not great about mixing and matching annotation formats. i.e. the |
||
""" | ||
Return isoformats for an array of datetimelike objects. | ||
|
||
Parameters | ||
---------- | ||
dtindex : DatetimeArray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DatetimeArray is inaccurate here. So is "datetimelike" above |
||
|
||
Returns | ||
------- | ||
Array of ISO formats | ||
""" | ||
cdef: | ||
Py_ssize_t i, count = len(dtindex) | ||
int64_t val, convert_status | ||
npy_datetimestruct dts | ||
char buf[34] # ns precision with UTC offset max length | ||
|
||
out = np.empty(count, dtype=object) | ||
|
||
for i in range(count): | ||
dt64_to_dtstruct(dtindex[i], &dts); | ||
# TODO: handle bad return | ||
convert_status = make_iso_8601_datetime(&dts, buf, 34, NPY_FR_ns) | ||
out[i] = buf.decode("UTF-8") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is returning this as bytes essential? side-thought: could tweak perf by making There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
http://docs.cython.org/en/latest/src/tutorial/strings.html#decoding-bytes-to-text |
||
|
||
return out | ||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def build_field_sarray(const int64_t[:] dtindex): | ||
|
@@ -128,7 +163,6 @@ def get_date_name_field(const int64_t[:] dtindex, object field, object locale=No | |
|
||
dt64_to_dtstruct(dtindex[i], &dts) | ||
out[i] = names[dts.month].capitalize() | ||
|
||
else: | ||
raise ValueError("Field {field} not supported".format(field=field)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,9 +134,11 @@ def f(self): | |
return result | ||
|
||
if field in self._object_ops: | ||
result = fields.get_date_name_field(values, field) | ||
result = self._maybe_mask_results(result, fill_value=None) | ||
|
||
if field == "isoformat": | ||
result = fields.get_datetime_isoformats(values) | ||
else: | ||
result = fields.get_date_name_field(values, field) | ||
result = self._maybe_mask_results(result, fill_value=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is maybe_mask_results necessary for the isoformat case? i.e. does it handle NaT? |
||
else: | ||
result = fields.get_date_field(values, field) | ||
result = self._maybe_mask_results( | ||
|
@@ -284,7 +286,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps | |
"is_year_end", | ||
"is_leap_year", | ||
] | ||
_object_ops = ["weekday_name", "freq", "tz"] | ||
_object_ops = ["weekday_name", "freq", "tz", "isoformat"] | ||
_field_ops = [ | ||
"year", | ||
"month", | ||
|
@@ -1522,7 +1524,13 @@ def date(self): | |
The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0 | ||
""", | ||
) | ||
|
||
isoformat = _field_accessor( | ||
"isoformat", | ||
"isoformat", | ||
""" | ||
ISO formatted string. | ||
""", | ||
) | ||
dayofyear = _field_accessor( | ||
"dayofyear", | ||
"doy", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,39 @@ def test_dti_timestamp_fields(self, field): | |
result = getattr(Timestamp(idx[-1]), field) | ||
assert result == expected | ||
|
||
@pytest.mark.parametrize( | ||
"tz,expected_vals", | ||
[ | ||
( | ||
"utc", | ||
[ | ||
"2000-01-01T00:00:00.000000000Z", | ||
"2000-01-02T00:00:00.000000000Z", | ||
"2000-01-03T00:00:00.000000000Z", | ||
], | ||
), | ||
# "US/Eastern", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls mangle to ensure this fails linting. want to avoid accidentally merging commented-out code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea my hope is to get this working; just a stub for now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW I think should be possible to support timezones if we vendor updates from Numpy: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For a while I've been wanting to make a PR at numpy to get them to expose these functions so we could There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool I’ll check out their repo. Would be nice instead of having to vendor |
||
# [ | ||
# "2000-01-01T00:00:00.000000000-05:00", | ||
# "2000-01-02T00:00:00.000000000-05:00", | ||
# "2000-01-03T00:00:00.000000000-05:00", | ||
# ], | ||
], | ||
) | ||
def test_dti_isoformat_datetimes(self, tz, expected_vals): | ||
dts = pd.date_range(start="2000-01-1", periods=3, freq="D", tz=tz) | ||
result = pd.Series(dts).dt.isoformat | ||
expected = pd.Series(expected_vals) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.skip | ||
def test_dti_isoformat_timedelts(self): | ||
... | ||
|
||
@pytest.mark.skip | ||
def test_dti_isoformat_period_raises(self): | ||
... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC this is idiomatic for typing overriding, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment here |
||
|
||
def test_dti_timestamp_freq_fields(self): | ||
# extra fields from DatetimeIndex like quarter and week | ||
idx = tm.makeDateIndex(100) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All else equal I'd rather do the "extern" in np_datetime and cimport from there. Trying to minimize the surface area of "src". That said, the ship may have sailed on that one, in which case never mind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool I'll give that a try. Definitely sounds logical