Skip to content

BUG: Fix bound checking for Timestamp() with dt64 #4065 #4926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ Bug Fixes
type of headers (:issue:`5048`).
- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a
stack overflow (:issue:`3899`).

- Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`)

pandas 0.12.0
-------------
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,13 @@ def _pickle_array(arr):

def _unpickle_array(bytes):
arr = read_array(BytesIO(bytes))

# All datetimes should be stored as M8[ns]. When unpickling with
# numpy1.6, it will read these as M8[us]. So this ensures all
# datetime64 types are read as MS[ns]
if is_datetime64_dtype(arr):
arr = arr.view(_NS_DTYPE)

return arr


Expand Down Expand Up @@ -1780,6 +1787,14 @@ def is_datetime64_dtype(arr_or_dtype):
tipo = arr_or_dtype.dtype.type
return issubclass(tipo, np.datetime64)

def is_datetime64_ns_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype
elif isinstance(arr_or_dtype, type):
tipo = np.dtype(arr_or_dtype)
else:
tipo = arr_or_dtype.dtype
return tipo == _NS_DTYPE

def is_timedelta64_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
Expand Down
3 changes: 3 additions & 0 deletions pandas/src/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ cdef extern from "datetime/np_datetime.h":
npy_int64 year
npy_int32 month, day, hour, min, sec, us, ps, as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

int convert_pydatetime_to_datetimestruct(PyObject *obj,
pandas_datetimestruct *out,
PANDAS_DATETIMEUNIT *out_bestunit,
Expand Down
63 changes: 63 additions & 0 deletions pandas/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,69 @@ set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts)
}
}

/*
* Compares two pandas_datetimestruct objects chronologically
*/
int
cmp_pandas_datetimestruct(pandas_datetimestruct *a, pandas_datetimestruct *b)
{
if (a->year > b->year) {
return 1;
} else if (a->year < b->year) {
return -1;
}

if (a->month > b->month) {
return 1;
} else if (a->month < b->month) {
return -1;
}

if (a->day > b->day) {
return 1;
} else if (a->day < b->day) {
return -1;
}

if (a->hour > b->hour) {
return 1;
} else if (a->hour < b->hour) {
return -1;
}

if (a->min > b->min) {
return 1;
} else if (a->min < b->min) {
return -1;
}

if (a->sec > b->sec) {
return 1;
} else if (a->sec < b->sec) {
return -1;
}

if (a->us > b->us) {
return 1;
} else if (a->us < b->us) {
return -1;
}

if (a->ps > b->ps) {
return 1;
} else if (a->ps < b->ps) {
return -1;
}

if (a->as > b->as) {
return 1;
} else if (a->as < b->as) {
return -1;
}

return 0;
}

/*
*
* Tests for and converts a Python datetime.datetime or datetime.date
Expand Down
14 changes: 7 additions & 7 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def __new__(cls, data=None,
data = _str_to_dt_array(data, offset, dayfirst=dayfirst,
yearfirst=yearfirst)
else:
data = tools.to_datetime(data)
data = tools.to_datetime(data, errors='raise')
data.offset = offset
if isinstance(data, DatetimeIndex):
if name is not None:
Expand Down Expand Up @@ -243,14 +243,14 @@ def __new__(cls, data=None,
subarr = data.view(_NS_DTYPE)
else:
try:
subarr = tools.to_datetime(data)
subarr = tools.to_datetime(data, box=False)
except ValueError:
# tz aware
subarr = tools.to_datetime(data, utc=True)
subarr = tools.to_datetime(data, box=False, utc=True)

if not np.issubdtype(subarr.dtype, np.datetime64):
raise TypeError('Unable to convert %s to datetime dtype'
% str(data))
raise ValueError('Unable to convert %s to datetime dtype'
% str(data))

if isinstance(subarr, DatetimeIndex):
if tz is None:
Expand Down Expand Up @@ -934,7 +934,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
'mixed-integer-float', 'mixed')):
try:
other = DatetimeIndex(other)
except TypeError:
except (TypeError, ValueError):
pass

this, other = self._maybe_utc_convert(other)
Expand Down Expand Up @@ -1051,7 +1051,7 @@ def intersection(self, other):
if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except TypeError:
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
if isinstance(result, DatetimeIndex):
Expand Down
77 changes: 76 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# pylint: disable-msg=E1101,W0612
from datetime import datetime, time, timedelta
from datetime import datetime, time, timedelta, date
import sys
import os
import unittest
Expand Down Expand Up @@ -952,6 +952,81 @@ def test_to_datetime_list_of_integers(self):

self.assert_(rng.equals(result))

def test_to_datetime_dt64s(self):
in_bound_dts = [
np.datetime64('2000-01-01'),
np.datetime64('2000-01-02'),
]

for dt in in_bound_dts:
self.assertEqual(
pd.to_datetime(dt),
Timestamp(dt)
)

oob_dts = [
np.datetime64('1000-01-01'),
np.datetime64('5000-01-02'),
]

for dt in oob_dts:
self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise')
self.assertRaises(ValueError, tslib.Timestamp, dt)
self.assert_(pd.to_datetime(dt, coerce=True) is NaT)

def test_to_datetime_array_of_dt64s(self):
dts = [
np.datetime64('2000-01-01'),
np.datetime64('2000-01-02'),
]

# Assuming all datetimes are in bounds, to_datetime() returns
# an array that is equal to Timestamp() parsing
self.assert_(
np.array_equal(
pd.to_datetime(dts, box=False),
np.array([Timestamp(x).asm8 for x in dts])
)
)

# A list of datetimes where the last one is out of bounds
dts_with_oob = dts + [np.datetime64('9999-01-01')]

self.assertRaises(
ValueError,
pd.to_datetime,
dts_with_oob,
coerce=False,
errors='raise'
)

self.assert_(
np.array_equal(
pd.to_datetime(dts_with_oob, box=False, coerce=True),
np.array(
[
Timestamp(dts_with_oob[0]).asm8,
Timestamp(dts_with_oob[1]).asm8,
iNaT,
],
dtype='M8'
)
)
)

# With coerce=False and errors='ignore', out of bounds datetime64s
# are converted to their .item(), which depending on the version of
# numpy is either a python datetime.datetime or datetime.date
self.assert_(
np.array_equal(
pd.to_datetime(dts_with_oob, box=False, coerce=False),
np.array(
[dt.item() for dt in dts_with_oob],
dtype='O'
)
)
)

def test_index_to_datetime(self):
idx = Index(['1/1/2000', '1/2/2000', '1/3/2000'])

Expand Down
108 changes: 86 additions & 22 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from pandas import tslib
from datetime import datetime
import datetime

from pandas.core.api import Timestamp

Expand All @@ -15,19 +15,53 @@
from pandas import _np_version_under1p7


class TestDatetimeParsingWrappers(unittest.TestCase):
def test_verify_datetime_bounds(self):
for year in (1, 1000, 1677, 2262, 5000):
dt = datetime(year, 1, 1)
self.assertRaises(
ValueError,
tslib.verify_datetime_bounds,
dt
)
class TestTimestamp(unittest.TestCase):
def test_bounds_with_different_units(self):
out_of_bounds_dates = (
'1677-09-21',
'2262-04-12',
)

time_units = ('D', 'h', 'm', 's', 'ms', 'us')

for year in (1678, 2000, 2261):
tslib.verify_datetime_bounds(datetime(year, 1, 1))
for date_string in out_of_bounds_dates:
for unit in time_units:
self.assertRaises(
ValueError,
tslib.Timestamp,
np.datetime64(date_string, dtype='M8[%s]' % unit)
)

in_bounds_dates = (
'1677-09-23',
'2262-04-11',
)

for date_string in in_bounds_dates:
for unit in time_units:
tslib.Timestamp(
np.datetime64(date_string, dtype='M8[%s]' % unit)
)

def test_barely_oob_dts(self):
one_us = np.timedelta64(1)

# By definition we can't go out of bounds in [ns], so we
# convert the datetime64s to [us] so we can go out of bounds
min_ts_us = np.datetime64(tslib.Timestamp.min).astype('M8[us]')
max_ts_us = np.datetime64(tslib.Timestamp.max).astype('M8[us]')

# No error for the min/max datetimes
tslib.Timestamp(min_ts_us)
tslib.Timestamp(max_ts_us)

# One us less than the minimum is an error
self.assertRaises(ValueError, tslib.Timestamp, min_ts_us - one_us)

# One us more than the maximum is an error
self.assertRaises(ValueError, tslib.Timestamp, max_ts_us + one_us)

class TestDatetimeParsingWrappers(unittest.TestCase):
def test_does_not_convert_mixed_integer(self):
bad_date_strings = (
'-50000',
Expand Down Expand Up @@ -97,15 +131,45 @@ def test_number_looking_strings_not_into_datetime(self):
arr = np.array(['1', '2', '3', '4', '5'], dtype=object)
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))

def test_dates_outside_of_datetime64_ns_bounds(self):
# These datetimes are outside of the bounds of the
# datetime64[ns] bounds, so they cannot be converted to
# datetimes
arr = np.array(['1/1/1676', '1/2/1676'], dtype=object)
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
def test_coercing_dates_outside_of_datetime64_ns_bounds(self):
invalid_dates = [
datetime.date(1000, 1, 1),
datetime.datetime(1000, 1, 1),
'1000-01-01',
'Jan 1, 1000',
np.datetime64('1000-01-01'),
]

arr = np.array(['1/1/2263', '1/2/2263'], dtype=object)
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
for invalid_date in invalid_dates:
self.assertRaises(
ValueError,
tslib.array_to_datetime,
np.array([invalid_date], dtype='object'),
coerce=False,
raise_=True,
)
self.assert_(
np.array_equal(
tslib.array_to_datetime(
np.array([invalid_date], dtype='object'), coerce=True
),
np.array([tslib.iNaT], dtype='M8[ns]')
)
)

arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
self.assert_(
np.array_equal(
tslib.array_to_datetime(arr, coerce=True),
np.array(
[
tslib.iNaT,
'2000-01-01T00:00:00.000000000-0000'
],
dtype='M8[ns]'
)
)
)

def test_coerce_of_invalid_datetimes(self):
arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)
Expand All @@ -130,11 +194,11 @@ def test_coerce_of_invalid_datetimes(self):
)


class TestTimestamp(unittest.TestCase):
class TestTimestampNsOperations(unittest.TestCase):
def setUp(self):
if _np_version_under1p7:
raise nose.SkipTest('numpy >= 1.7 required')
self.timestamp = Timestamp(datetime.utcnow())
self.timestamp = Timestamp(datetime.datetime.utcnow())

def assert_ns_timedelta(self, modified_timestamp, expected_value):
value = self.timestamp.value
Expand Down
Loading