Skip to content

Commit 22cb379

Browse files
authored
REF: Avoid ravel in DTA._format_native_types (pandas-dev#47411)
1 parent 2517b92 commit 22cb379

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

pandas/_libs/tslib.pyx

+27-10
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def _test_parse_iso8601(ts: str):
105105
@cython.wraparound(False)
106106
@cython.boundscheck(False)
107107
def format_array_from_datetime(
108-
ndarray[int64_t] values,
108+
ndarray values,
109109
tzinfo tz=None,
110110
str format=None,
111111
object na_rep=None,
@@ -129,14 +129,21 @@ def format_array_from_datetime(
129129
np.ndarray[object]
130130
"""
131131
cdef:
132-
int64_t val, ns, N = len(values)
132+
int64_t val, ns, N = values.size
133133
bint show_ms = False, show_us = False, show_ns = False
134134
bint basic_format = False
135-
ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
136135
_Timestamp ts
137-
str res
136+
object res
138137
npy_datetimestruct dts
139138

139+
# Note that `result` (and thus `result_flat`) is C-order and
140+
# `it` iterates C-order as well, so the iteration matches
141+
# See discussion at
142+
# github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
143+
ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
144+
object[::1] res_flat = result.ravel() # should NOT be a copy
145+
cnp.flatiter it = cnp.PyArray_IterNew(values)
146+
140147
if na_rep is None:
141148
na_rep = 'NaT'
142149

@@ -150,10 +157,11 @@ def format_array_from_datetime(
150157
show_ms = reso_obj == Resolution.RESO_MS
151158

152159
for i in range(N):
153-
val = values[i]
160+
# Analogous to: utc_val = values[i]
161+
val = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]
154162

155163
if val == NPY_NAT:
156-
result[i] = na_rep
164+
res = na_rep
157165
elif basic_format:
158166

159167
pandas_datetime_to_datetimestruct(val, reso, &dts)
@@ -168,22 +176,31 @@ def format_array_from_datetime(
168176
elif show_ms:
169177
res += f'.{dts.us // 1000:03d}'
170178

171-
result[i] = res
172179

173180
else:
174181

175182
ts = Timestamp._from_value_and_reso(val, reso=reso, tz=tz)
176183
if format is None:
177-
result[i] = str(ts)
184+
res = str(ts)
178185
else:
179186

180187
# invalid format string
181188
# requires dates > 1900
182189
try:
183190
# Note: dispatches to pydatetime
184-
result[i] = ts.strftime(format)
191+
res = ts.strftime(format)
185192
except ValueError:
186-
result[i] = str(ts)
193+
res = str(ts)
194+
195+
# Note: we can index result directly instead of using PyArray_MultiIter_DATA
196+
# like we do for the other functions because result is known C-contiguous
197+
# and is the first argument to PyArray_MultiIterNew2. The usual pattern
198+
# does not seem to work with object dtype.
199+
# See discussion at
200+
# github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
201+
res_flat[i] = res
202+
203+
cnp.PyArray_ITER_NEXT(it)
187204

188205
return result
189206

pandas/core/arrays/datetimes.py

-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,6 @@ def astype(self, dtype, copy: bool = True):
687687
# -----------------------------------------------------------------
688688
# Rendering Methods
689689

690-
@dtl.ravel_compat
691690
def _format_native_types(
692691
self, *, na_rep="NaT", date_format=None, **kwargs
693692
) -> npt.NDArray[np.object_]:

0 commit comments

Comments
 (0)