Skip to content

Commit a60e325

Browse files
jbrockmendelharisbal
authored and
harisbal
committed
BUG: fix Period.asfreq conversion near datetime(1, 1, 1) (pandas-dev#19650)
1 parent 1e3ff82 commit a60e325

File tree

5 files changed

+133
-22
lines changed

5 files changed

+133
-22
lines changed

doc/source/whatsnew/v0.23.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -741,8 +741,9 @@ Timedelta
741741
- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`)
742742
- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`)
743743
- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`)
744+
- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`)
744745
- Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`)
745-
746+
-
746747

747748
Timezones
748749
^^^^^^^^^

pandas/_libs/src/period_helper.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal,
138138
}
139139

140140
static npy_int64 DtoB_weekday(npy_int64 absdate) {
141-
return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET;
141+
return floordiv(absdate, 7) * 5 + mod_compat(absdate, 7) - BDAY_OFFSET;
142142
}
143143

144144
static npy_int64 DtoB(struct date_info *dinfo,
@@ -245,7 +245,8 @@ static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal,
245245
static npy_int64 asfreq_BtoDT(npy_int64 ordinal, asfreq_info *af_info) {
246246
ordinal += BDAY_OFFSET;
247247
ordinal =
248-
(((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET);
248+
(floordiv(ordinal - 1, 5) * 7 + mod_compat(ordinal - 1, 5) + 1 -
249+
ORD_OFFSET);
249250

250251
return upsample_daytime(ordinal, af_info);
251252
}

pandas/_libs/tslibs/period.pyx

+105-19
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,32 @@ cdef inline int get_freq_group(int freq) nogil:
154154
return (freq // 1000) * 1000
155155

156156

157-
@cython.cdivision
157+
# specifically _dont_ use cdvision or else ordinals near -1 are assigned to
158+
# incorrect dates GH#19643
159+
@cython.cdivision(False)
158160
cdef int64_t get_period_ordinal(int year, int month, int day,
159161
int hour, int minute, int second,
160162
int microseconds, int picoseconds,
161163
int freq) nogil:
162-
"""generate an ordinal in period space"""
164+
"""
165+
Generate an ordinal in period space
166+
167+
Parameters
168+
----------
169+
year : int
170+
month : int
171+
day : int
172+
hour : int
173+
minute : int
174+
second : int
175+
microseconds : int
176+
picoseconds : int
177+
freq : int
178+
179+
Returns
180+
-------
181+
period_ordinal : int64_t
182+
"""
163183
cdef:
164184
int64_t absdays, unix_date, seconds, delta
165185
int64_t weeks
@@ -190,7 +210,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day,
190210
if month >= fmonth:
191211
mdiff += 12
192212

193-
return (year - 1970) * 4 + (mdiff - 1) / 3
213+
return (year - 1970) * 4 + (mdiff - 1) // 3
194214

195215
elif freq == FR_MTH:
196216
return (year - 1970) * 12 + month - 1
@@ -202,14 +222,14 @@ cdef int64_t get_period_ordinal(int year, int month, int day,
202222
seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second
203223

204224
if freq == FR_MS:
205-
return seconds * 1000 + microseconds / 1000
225+
return seconds * 1000 + microseconds // 1000
206226

207227
elif freq == FR_US:
208228
return seconds * 1000000 + microseconds
209229

210230
elif freq == FR_NS:
211231
return (seconds * 1000000000 +
212-
microseconds * 1000 + picoseconds / 1000)
232+
microseconds * 1000 + picoseconds // 1000)
213233

214234
else:
215235
return seconds
@@ -229,7 +249,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day,
229249
elif freq == FR_BUS:
230250
# calculate the current week assuming sunday as last day of a week
231251
# Jan 1 0001 is a Monday, so subtract 1 to get to end-of-week
232-
weeks = (unix_date + ORD_OFFSET - 1) / 7
252+
weeks = (unix_date + ORD_OFFSET - 1) // 7
233253
# calculate the current weekday (in range 1 .. 7)
234254
delta = (unix_date + ORD_OFFSET - 1) % 7 + 1
235255
# return the number of business days in full weeks plus the business
@@ -241,12 +261,12 @@ cdef int64_t get_period_ordinal(int year, int month, int day,
241261

242262
elif freq_group == FR_WK:
243263
day_adj = freq - FR_WK
244-
return (unix_date + ORD_OFFSET - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET
264+
return (unix_date + ORD_OFFSET - (1 + day_adj)) // 7 + 1 - WEEK_OFFSET
245265

246266
# raise ValueError
247267

248268

249-
cdef int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil:
269+
cdef void get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil:
250270
cdef:
251271
int64_t absdate
252272
double abstime
@@ -263,7 +283,6 @@ cdef int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil:
263283
absdate += 1
264284

265285
dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime)
266-
return 0
267286

268287

269288
cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil:
@@ -272,6 +291,15 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil:
272291
This corresponds to the number of days since Jan., 1st, 1AD.
273292
When the instance has a frequency less than daily, the proleptic date
274293
is calculated for the last day of the period.
294+
295+
Parameters
296+
----------
297+
period_ordinal : int64_t
298+
freq : int
299+
300+
Returns
301+
-------
302+
absdate : int64_t number of days since datetime(1, 1, 1)
275303
"""
276304
cdef:
277305
asfreq_info af_info
@@ -285,11 +313,23 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil:
285313
return toDaily(period_ordinal, &af_info) + ORD_OFFSET
286314

287315

288-
cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo,
289-
int64_t absdate, double abstime) nogil:
316+
cdef void dInfoCalc_SetFromAbsDateTime(date_info *dinfo,
317+
int64_t absdate, double abstime) nogil:
290318
"""
291319
Set the instance's value using the given date and time.
292320
Assumes GREGORIAN_CALENDAR.
321+
322+
Parameters
323+
----------
324+
dinfo : date_info*
325+
absdate : int64_t
326+
days elapsed since datetime(1, 1, 1)
327+
abstime : double
328+
seconds elapsed since beginning of day described by absdate
329+
330+
Notes
331+
-----
332+
Updates dinfo inplace
293333
"""
294334
# Bounds check
295335
# The calling function is responsible for ensuring that
@@ -300,13 +340,21 @@ cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo,
300340

301341
# Calculate the time
302342
dInfoCalc_SetFromAbsTime(dinfo, abstime)
303-
return 0
304343

305344

306-
cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil:
345+
cdef void dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil:
307346
"""
308347
Sets the date part of the date_info struct
309348
Assumes GREGORIAN_CALENDAR
349+
350+
Parameters
351+
----------
352+
dinfo : date_info*
353+
unix_date : int64_t
354+
355+
Notes
356+
-----
357+
Updates dinfo inplace
310358
"""
311359
cdef:
312360
pandas_datetimestruct dts
@@ -315,13 +363,22 @@ cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil:
315363
dinfo.year = dts.year
316364
dinfo.month = dts.month
317365
dinfo.day = dts.day
318-
return 0
319366

320367

321368
@cython.cdivision
322-
cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil:
369+
cdef void dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil:
323370
"""
324371
Sets the time part of the DateTime object.
372+
373+
Parameters
374+
----------
375+
dinfo : date_info*
376+
abstime : double
377+
seconds elapsed since beginning of day described by absdate
378+
379+
Notes
380+
-----
381+
Updates dinfo inplace
325382
"""
326383
cdef:
327384
int inttime
@@ -336,7 +393,6 @@ cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil:
336393
dinfo.hour = hour
337394
dinfo.minute = minute
338395
dinfo.second = second
339-
return 0
340396

341397

342398
@cython.cdivision
@@ -370,7 +426,19 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil:
370426
Find the absdate (days elapsed since datetime(1, 1, 1)
371427
for the given year/month/day.
372428
Assumes GREGORIAN_CALENDAR
429+
430+
Parameters
431+
----------
432+
year : int
433+
month : int
434+
day : int
435+
436+
Returns
437+
-------
438+
absdate : int
439+
days elapsed since datetime(1, 1, 1)
373440
"""
441+
374442
# /* Calculate the absolute date
375443
cdef:
376444
pandas_datetimestruct dts
@@ -385,6 +453,25 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil:
385453

386454

387455
cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year):
456+
"""
457+
Find the year and quarter of a Period with the given ordinal and frequency
458+
459+
Parameters
460+
----------
461+
ordinal : int64_t
462+
freq : int
463+
quarter : *int
464+
year : *int
465+
466+
Returns
467+
-------
468+
qtr_freq : int
469+
describes the implied quarterly frequency associated with `freq`
470+
471+
Notes
472+
-----
473+
Sets quarter and year inplace
474+
"""
388475
cdef:
389476
asfreq_info af_info
390477
int qtr_freq
@@ -403,8 +490,8 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year):
403490
return qtr_freq
404491

405492

406-
cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info,
407-
int *year, int *quarter):
493+
cdef void DtoQ_yq(int64_t ordinal, asfreq_info *af_info,
494+
int *year, int *quarter):
408495
cdef:
409496
date_info dinfo
410497

@@ -419,7 +506,6 @@ cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info,
419506

420507
year[0] = dinfo.year
421508
quarter[0] = monthToQuarter(dinfo.month)
422-
return 0
423509

424510

425511
cdef inline int monthToQuarter(int month):

pandas/tests/scalar/period/test_period_asfreq.py

+22
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import pytest
2+
3+
from pandas.errors import OutOfBoundsDatetime
4+
15
import pandas as pd
26
from pandas import Period, offsets
37
from pandas.util import testing as tm
@@ -6,6 +10,24 @@
610

711
class TestFreqConversion(object):
812
"""Test frequency conversion of date objects"""
13+
@pytest.mark.parametrize('freq', ['A', 'Q', 'M', 'W', 'B', 'D'])
14+
def test_asfreq_near_zero(self, freq):
15+
# GH#19643, GH#19650
16+
per = Period('0001-01-01', freq=freq)
17+
tup1 = (per.year, per.hour, per.day)
18+
19+
prev = per - 1
20+
assert (per - 1).ordinal == per.ordinal - 1
21+
tup2 = (prev.year, prev.month, prev.day)
22+
assert tup2 < tup1
23+
24+
@pytest.mark.xfail(reason='GH#19643 period_helper asfreq functions fail '
25+
'to check for overflows')
26+
def test_to_timestamp_out_of_bounds(self):
27+
# GH#19643, currently gives Timestamp('1754-08-30 22:43:41.128654848')
28+
per = Period('0001-01-01', freq='B')
29+
with pytest.raises(OutOfBoundsDatetime):
30+
per.to_timestamp()
931

1032
def test_asfreq_corner(self):
1133
val = Period(freq='A', year=2007)

pandas/tests/tslibs/test_period_asfreq.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
class TestPeriodFreqConversion(object):
8+
89
def test_intraday_conversion_factors(self):
910
assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24
1011
assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440

0 commit comments

Comments
 (0)