Skip to content

Commit 68ca401

Browse files
chris-b1jorisvandenbossche
authored andcommitted
[Backport 14433] BUG: underflow on Timestamp creation (pandas-dev#14433)
* BUG: underflow on Timestamp creation * undo change to lower bound * change lower bound; but keep rounding to us (cherry picked from commit 65362aa)
1 parent 4ac3295 commit 68ca401

File tree

7 files changed

+51
-32
lines changed

7 files changed

+51
-32
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Bug Fixes
4646
- ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`)
4747

4848

49+
- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`)
4950

5051
- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
5152
- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`)

pandas/lib.pyx

+2-7
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,8 @@ cdef int64_t NPY_NAT = util.get_nat()
6565
ctypedef unsigned char UChar
6666

6767
cimport util
68-
from util cimport is_array, _checknull, _checknan
69-
70-
cdef extern from "headers/stdint.h":
71-
enum: UINT8_MAX
72-
enum: INT64_MAX
73-
enum: INT64_MIN
74-
68+
from util cimport (is_array, _checknull, _checknan, INT64_MAX,
69+
INT64_MIN, UINT8_MAX)
7570

7671
cdef extern from "math.h":
7772
double sqrt(double x)

pandas/src/datetime/np_datetime.c

+14-7
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
846846
dt = dt % perday;
847847
}
848848
else {
849-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
849+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
850+
out);
850851
dt = (perday-1) + (dt + 1) % perday;
851852
}
852853
out->hour = dt;
@@ -860,7 +861,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
860861
dt = dt % perday;
861862
}
862863
else {
863-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
864+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
865+
out);
864866
dt = (perday-1) + (dt + 1) % perday;
865867
}
866868
out->hour = dt / 60;
@@ -875,7 +877,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
875877
dt = dt % perday;
876878
}
877879
else {
878-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
880+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
881+
out);
879882
dt = (perday-1) + (dt + 1) % perday;
880883
}
881884
out->hour = dt / (60*60);
@@ -891,7 +894,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
891894
dt = dt % perday;
892895
}
893896
else {
894-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
897+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
898+
out);
895899
dt = (perday-1) + (dt + 1) % perday;
896900
}
897901
out->hour = dt / (60*60*1000LL);
@@ -908,7 +912,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
908912
dt = dt % perday;
909913
}
910914
else {
911-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
915+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
916+
out);
912917
dt = (perday-1) + (dt + 1) % perday;
913918
}
914919
out->hour = dt / (60*60*1000000LL);
@@ -925,7 +930,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
925930
dt = dt % perday;
926931
}
927932
else {
928-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
933+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
934+
out);
929935
dt = (perday-1) + (dt + 1) % perday;
930936
}
931937
out->hour = dt / (60*60*1000000000LL);
@@ -943,7 +949,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
943949
dt = dt % perday;
944950
}
945951
else {
946-
set_datetimestruct_days((dt - (perday-1)) / perday, out);
952+
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
953+
out);
947954
dt = (perday-1) + (dt + 1) % perday;
948955
}
949956
out->hour = dt / (60*60*1000000000000LL);

pandas/src/inference.pyx

+3-13
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,9 @@ iNaT = util.get_nat()
66

77
cdef bint PY2 = sys.version_info[0] == 2
88

9-
cdef extern from "headers/stdint.h":
10-
enum: UINT8_MAX
11-
enum: UINT16_MAX
12-
enum: UINT32_MAX
13-
enum: UINT64_MAX
14-
enum: INT8_MIN
15-
enum: INT8_MAX
16-
enum: INT16_MIN
17-
enum: INT16_MAX
18-
enum: INT32_MAX
19-
enum: INT32_MIN
20-
enum: INT64_MAX
21-
enum: INT64_MIN
9+
from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
10+
INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX,
11+
INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN)
2212

2313
# core.common import for fast inference checks
2414

pandas/src/util.pxd

+14
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,20 @@ ctypedef fused numeric:
3838
cnp.float32_t
3939
cnp.float64_t
4040

41+
cdef extern from "headers/stdint.h":
42+
enum: UINT8_MAX
43+
enum: UINT16_MAX
44+
enum: UINT32_MAX
45+
enum: UINT64_MAX
46+
enum: INT8_MIN
47+
enum: INT8_MAX
48+
enum: INT16_MIN
49+
enum: INT16_MAX
50+
enum: INT32_MAX
51+
enum: INT32_MIN
52+
enum: INT64_MAX
53+
enum: INT64_MIN
54+
4155
cdef inline object get_value_at(ndarray arr, object loc):
4256
cdef:
4357
Py_ssize_t i, sz

pandas/tseries/tests/test_timeseries.py

+9
Original file line numberDiff line numberDiff line change
@@ -4463,6 +4463,15 @@ def test_basics_nanos(self):
44634463
self.assertEqual(stamp.microsecond, 0)
44644464
self.assertEqual(stamp.nanosecond, 500)
44654465

4466+
# GH 14415
4467+
val = np.iinfo(np.int64).min + 80000000000000
4468+
stamp = Timestamp(val)
4469+
self.assertEqual(stamp.year, 1677)
4470+
self.assertEqual(stamp.month, 9)
4471+
self.assertEqual(stamp.day, 21)
4472+
self.assertEqual(stamp.microsecond, 145224)
4473+
self.assertEqual(stamp.nanosecond, 192)
4474+
44664475
def test_unit(self):
44674476

44684477
def check(val, unit=None, h=1, s=1, us=0):

pandas/tslib.pyx

+8-5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ from cpython cimport (
2424
PyUnicode_AsUTF8String,
2525
)
2626

27+
2728
# Cython < 0.17 doesn't have this in cpython
2829
cdef extern from "Python.h":
2930
cdef PyTypeObject *Py_TYPE(object)
@@ -37,7 +38,7 @@ from datetime cimport cmp_pandas_datetimestruct
3738
from libc.stdlib cimport free
3839

3940
from util cimport (is_integer_object, is_float_object, is_datetime64_object,
40-
is_timedelta64_object)
41+
is_timedelta64_object, INT64_MAX)
4142
cimport util
4243

4344
from datetime cimport *
@@ -904,10 +905,12 @@ cpdef object get_value_box(ndarray arr, object loc):
904905

905906

906907
# Add the min and max fields at the class level
907-
# These are defined as magic numbers due to strange
908-
# wraparound behavior when using the true int64 lower boundary
909-
cdef int64_t _NS_LOWER_BOUND = -9223285636854775000LL
910-
cdef int64_t _NS_UPPER_BOUND = 9223372036854775807LL
908+
cdef int64_t _NS_UPPER_BOUND = INT64_MAX
909+
# the smallest value we could actually represent is
910+
# INT64_MIN + 1 == -9223372036854775807
911+
# but to allow overflow free conversion with a microsecond resolution
912+
# use the smallest value with a 0 nanosecond unit (0s in last 3 digits)
913+
cdef int64_t _NS_LOWER_BOUND = -9223372036854775000
911914

912915
cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
913916
pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS)

0 commit comments

Comments
 (0)