Skip to content

Commit 12f123d

Browse files
author
auderson
committed
Merge remote-tracking branch 'upstream/main' into roll_var_remove_floating_point_artifacts
# Conflicts: # doc/source/whatsnew/v1.5.0.rst
2 parents e57f86c + b6f21f3 commit 12f123d

File tree

18 files changed

+379
-233
lines changed

18 files changed

+379
-233
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -601,8 +601,10 @@ Groupby/resample/rolling
601601
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
602602
- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
603603
- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
604+
- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
604605
- Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
605606
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
607+
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
606608

607609
Reshaping
608610
^^^^^^^^^

pandas/_libs/tslibs/vectorized.pyx

+109-139
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@ from .np_datetime cimport (
3232
)
3333
from .offsets cimport BaseOffset
3434
from .period cimport get_period_ordinal
35-
from .timestamps cimport (
36-
create_timestamp_from_ts,
37-
normalize_i8_stamp,
38-
)
35+
from .timestamps cimport create_timestamp_from_ts
3936
from .timezones cimport (
4037
get_dst_info,
4138
is_tzlocal,
@@ -47,6 +44,54 @@ from .tzconversion cimport (
4744
localize_tzinfo_api,
4845
)
4946

47+
48+
cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)
49+
50+
51+
@cython.freelist(16)
52+
@cython.internal
53+
@cython.final
54+
cdef class Localizer:
55+
cdef:
56+
tzinfo tz
57+
bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz
58+
ndarray trans
59+
Py_ssize_t ntrans
60+
const int64_t[::1] deltas
61+
int64_t delta
62+
63+
@cython.initializedcheck(False)
64+
@cython.boundscheck(False)
65+
def __cinit__(self, tzinfo tz):
66+
self.tz = tz
67+
self.use_utc = self.use_tzlocal = self.use_fixed = False
68+
self.use_dst = self.use_pytz = False
69+
self.ntrans = -1 # placeholder
70+
self.delta = -1 # placeholder
71+
self.deltas = _deltas_placeholder
72+
73+
if is_utc(tz) or tz is None:
74+
self.use_utc = True
75+
76+
elif is_tzlocal(tz) or is_zoneinfo(tz):
77+
self.use_tzlocal = True
78+
79+
else:
80+
trans, deltas, typ = get_dst_info(tz)
81+
self.trans = trans
82+
self.ntrans = trans.shape[0]
83+
self.deltas = deltas
84+
85+
if typ != "pytz" and typ != "dateutil":
86+
# static/fixed; in this case we know that len(delta) == 1
87+
self.use_fixed = True
88+
self.delta = deltas[0]
89+
else:
90+
self.use_dst = True
91+
if typ == "pytz":
92+
self.use_pytz = True
93+
94+
5095
# -------------------------------------------------------------------------
5196

5297

@@ -87,19 +132,14 @@ def ints_to_pydatetime(
87132
ndarray[object] of type specified by box
88133
"""
89134
cdef:
90-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
91-
ndarray[int64_t] trans
92-
int64_t[::1] deltas
135+
Localizer info = Localizer(tz)
136+
int64_t utc_val, local_val
137+
Py_ssize_t pos, i, n = stamps.shape[0]
93138
int64_t* tdata = NULL
94-
intp_t pos
95-
int64_t utc_val, local_val, delta = NPY_NAT
96-
bint use_utc = False, use_tzlocal = False, use_fixed = False
97-
str typ
98139

99140
npy_datetimestruct dts
100141
tzinfo new_tz
101142
ndarray[object] result = np.empty(n, dtype=object)
102-
bint use_pytz = False
103143
bint use_date = False, use_time = False, use_ts = False, use_pydt = False
104144

105145
if box == "date":
@@ -116,20 +156,8 @@ def ints_to_pydatetime(
116156
"box must be one of 'datetime', 'date', 'time' or 'timestamp'"
117157
)
118158

119-
if is_utc(tz) or tz is None:
120-
use_utc = True
121-
elif is_tzlocal(tz) or is_zoneinfo(tz):
122-
use_tzlocal = True
123-
else:
124-
trans, deltas, typ = get_dst_info(tz)
125-
ntrans = trans.shape[0]
126-
if typ not in ["pytz", "dateutil"]:
127-
# static/fixed; in this case we know that len(delta) == 1
128-
use_fixed = True
129-
delta = deltas[0]
130-
else:
131-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
132-
use_pytz = typ == "pytz"
159+
if info.use_dst:
160+
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
133161

134162
for i in range(n):
135163
utc_val = stamps[i]
@@ -139,17 +167,17 @@ def ints_to_pydatetime(
139167
result[i] = <object>NaT
140168
continue
141169

142-
if use_utc:
170+
if info.use_utc:
143171
local_val = utc_val
144-
elif use_tzlocal:
172+
elif info.use_tzlocal:
145173
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
146-
elif use_fixed:
147-
local_val = utc_val + delta
174+
elif info.use_fixed:
175+
local_val = utc_val + info.delta
148176
else:
149-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
150-
local_val = utc_val + deltas[pos]
177+
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
178+
local_val = utc_val + info.deltas[pos]
151179

152-
if use_pytz:
180+
if info.use_pytz:
153181
# find right representation of dst etc in pytz timezone
154182
new_tz = tz._tzinfos[tz._transition_info[pos]]
155183

@@ -191,46 +219,31 @@ cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts):
191219
@cython.boundscheck(False)
192220
def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
193221
cdef:
194-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
195-
ndarray[int64_t] trans
196-
int64_t[::1] deltas
222+
Localizer info = Localizer(tz)
223+
int64_t utc_val, local_val
224+
Py_ssize_t pos, i, n = stamps.shape[0]
197225
int64_t* tdata = NULL
198-
intp_t pos
199-
int64_t utc_val, local_val, delta = NPY_NAT
200-
bint use_utc = False, use_tzlocal = False, use_fixed = False
201-
str typ
202226

203227
npy_datetimestruct dts
204228
c_Resolution reso = c_Resolution.RESO_DAY, curr_reso
205229

206-
if is_utc(tz) or tz is None:
207-
use_utc = True
208-
elif is_tzlocal(tz) or is_zoneinfo(tz):
209-
use_tzlocal = True
210-
else:
211-
trans, deltas, typ = get_dst_info(tz)
212-
ntrans = trans.shape[0]
213-
if typ not in ["pytz", "dateutil"]:
214-
# static/fixed; in this case we know that len(delta) == 1
215-
use_fixed = True
216-
delta = deltas[0]
217-
else:
218-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
230+
if info.use_dst:
231+
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
219232

220233
for i in range(n):
221234
utc_val = stamps[i]
222235
if utc_val == NPY_NAT:
223236
continue
224237

225-
if use_utc:
238+
if info.use_utc:
226239
local_val = utc_val
227-
elif use_tzlocal:
240+
elif info.use_tzlocal:
228241
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
229-
elif use_fixed:
230-
local_val = utc_val + delta
242+
elif info.use_fixed:
243+
local_val = utc_val + info.delta
231244
else:
232-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
233-
local_val = utc_val + deltas[pos]
245+
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
246+
local_val = utc_val + info.deltas[pos]
234247

235248
dt64_to_dtstruct(local_val, &dts)
236249
curr_reso = _reso_stamp(&dts)
@@ -242,6 +255,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
242255

243256
# -------------------------------------------------------------------------
244257

258+
259+
@cython.cdivision(False)
245260
@cython.wraparound(False)
246261
@cython.boundscheck(False)
247262
cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz):
@@ -260,48 +275,33 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
260275
result : int64 ndarray of converted of normalized nanosecond timestamps
261276
"""
262277
cdef:
263-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
264-
ndarray[int64_t] trans
265-
int64_t[::1] deltas
278+
Localizer info = Localizer(tz)
279+
int64_t utc_val, local_val
280+
Py_ssize_t pos, i, n = stamps.shape[0]
266281
int64_t* tdata = NULL
267-
intp_t pos
268-
int64_t utc_val, local_val, delta = NPY_NAT
269-
bint use_utc = False, use_tzlocal = False, use_fixed = False
270-
str typ
271282

272283
int64_t[::1] result = np.empty(n, dtype=np.int64)
273284

274-
if is_utc(tz) or tz is None:
275-
use_utc = True
276-
elif is_tzlocal(tz) or is_zoneinfo(tz):
277-
use_tzlocal = True
278-
else:
279-
trans, deltas, typ = get_dst_info(tz)
280-
ntrans = trans.shape[0]
281-
if typ not in ["pytz", "dateutil"]:
282-
# static/fixed; in this case we know that len(delta) == 1
283-
use_fixed = True
284-
delta = deltas[0]
285-
else:
286-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
285+
if info.use_dst:
286+
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
287287

288288
for i in range(n):
289289
utc_val = stamps[i]
290290
if utc_val == NPY_NAT:
291291
result[i] = NPY_NAT
292292
continue
293293

294-
if use_utc:
294+
if info.use_utc:
295295
local_val = utc_val
296-
elif use_tzlocal:
296+
elif info.use_tzlocal:
297297
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
298-
elif use_fixed:
299-
local_val = utc_val + delta
298+
elif info.use_fixed:
299+
local_val = utc_val + info.delta
300300
else:
301-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
302-
local_val = utc_val + deltas[pos]
301+
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
302+
local_val = utc_val + info.deltas[pos]
303303

304-
result[i] = normalize_i8_stamp(local_val)
304+
result[i] = local_val - (local_val % DAY_NANOS)
305305

306306
return result.base # `.base` to access underlying ndarray
307307

@@ -324,40 +324,25 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
324324
is_normalized : bool True if all stamps are normalized
325325
"""
326326
cdef:
327-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
328-
ndarray[int64_t] trans
329-
int64_t[::1] deltas
327+
Localizer info = Localizer(tz)
328+
int64_t utc_val, local_val
329+
Py_ssize_t pos, i, n = stamps.shape[0]
330330
int64_t* tdata = NULL
331-
intp_t pos
332-
int64_t utc_val, local_val, delta = NPY_NAT
333-
bint use_utc = False, use_tzlocal = False, use_fixed = False
334-
str typ
335-
336-
if is_utc(tz) or tz is None:
337-
use_utc = True
338-
elif is_tzlocal(tz) or is_zoneinfo(tz):
339-
use_tzlocal = True
340-
else:
341-
trans, deltas, typ = get_dst_info(tz)
342-
ntrans = trans.shape[0]
343-
if typ not in ["pytz", "dateutil"]:
344-
# static/fixed; in this case we know that len(delta) == 1
345-
use_fixed = True
346-
delta = deltas[0]
347-
else:
348-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
331+
332+
if info.use_dst:
333+
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
349334

350335
for i in range(n):
351336
utc_val = stamps[i]
352-
if use_utc:
337+
if info.use_utc:
353338
local_val = utc_val
354-
elif use_tzlocal:
339+
elif info.use_tzlocal:
355340
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
356-
elif use_fixed:
357-
local_val = utc_val + delta
341+
elif info.use_fixed:
342+
local_val = utc_val + info.delta
358343
else:
359-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
360-
local_val = utc_val + deltas[pos]
344+
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
345+
local_val = utc_val + info.deltas[pos]
361346

362347
if local_val % DAY_NANOS != 0:
363348
return False
@@ -372,47 +357,32 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
372357
@cython.boundscheck(False)
373358
def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
374359
cdef:
375-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
376-
ndarray[int64_t] trans
377-
int64_t[::1] deltas
360+
Localizer info = Localizer(tz)
361+
int64_t utc_val, local_val
362+
Py_ssize_t pos, i, n = stamps.shape[0]
378363
int64_t* tdata = NULL
379-
intp_t pos
380-
int64_t utc_val, local_val, delta = NPY_NAT
381-
bint use_utc = False, use_tzlocal = False, use_fixed = False
382-
str typ
383364

384365
npy_datetimestruct dts
385366
int64_t[::1] result = np.empty(n, dtype=np.int64)
386367

387-
if is_utc(tz) or tz is None:
388-
use_utc = True
389-
elif is_tzlocal(tz) or is_zoneinfo(tz):
390-
use_tzlocal = True
391-
else:
392-
trans, deltas, typ = get_dst_info(tz)
393-
ntrans = trans.shape[0]
394-
if typ not in ["pytz", "dateutil"]:
395-
# static/fixed; in this case we know that len(delta) == 1
396-
use_fixed = True
397-
delta = deltas[0]
398-
else:
399-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
368+
if info.use_dst:
369+
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
400370

401371
for i in range(n):
402372
utc_val = stamps[i]
403373
if utc_val == NPY_NAT:
404374
result[i] = NPY_NAT
405375
continue
406376

407-
if use_utc:
377+
if info.use_utc:
408378
local_val = utc_val
409-
elif use_tzlocal:
379+
elif info.use_tzlocal:
410380
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
411-
elif use_fixed:
412-
local_val = utc_val + delta
381+
elif info.use_fixed:
382+
local_val = utc_val + info.delta
413383
else:
414-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
415-
local_val = utc_val + deltas[pos]
384+
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
385+
local_val = utc_val + info.deltas[pos]
416386

417387
dt64_to_dtstruct(local_val, &dts)
418388
result[i] = get_period_ordinal(&dts, freq)

0 commit comments

Comments
 (0)