Skip to content

Commit 0067b90

Browse files
sinhrksjreback
authored andcommitted
BUG: value_counts may raise OutOfBoundsDatetime (#13772)
1 parent f919b9e commit 0067b90

File tree

5 files changed

+43
-13
lines changed

5 files changed

+43
-13
lines changed

doc/source/whatsnew/v0.19.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,8 @@ Bug Fixes
747747
- Bug in invalid datetime parsing in ``to_datetime`` and ``DatetimeIndex`` may raise ``TypeError`` rather than ``ValueError`` (:issue:`11169`, :issue:`11287`)
748748
- Bug in ``Index`` created with tz-aware ``Timestamp`` and mismatched ``tz`` option incorrectly coerces timezone (:issue:`13692`)
749749
- Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`)
750+
- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`)
751+
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)
750752

751753
- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
752754
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)

pandas/core/series.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,13 @@ def _set_axis(self, axis, labels, fastpath=False):
291291

292292
if not isinstance(labels,
293293
(DatetimeIndex, PeriodIndex, TimedeltaIndex)):
294-
labels = DatetimeIndex(labels)
295-
296-
# need to set here becuase we changed the index
297-
if fastpath:
298-
self._data.set_axis(axis, labels)
294+
try:
295+
labels = DatetimeIndex(labels)
296+
# need to set here becuase we changed the index
297+
if fastpath:
298+
self._data.set_axis(axis, labels)
299+
except tslib.OutOfBoundsDatetime:
300+
pass
299301
self._set_subtyp(is_all_dates)
300302

301303
object.__setattr__(self, '_index', labels)

pandas/indexes/base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -258,13 +258,15 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
258258
pass
259259
elif inferred != 'string':
260260
if inferred.startswith('datetime'):
261-
262261
if (lib.is_datetime_with_singletz_array(subarr) or
263262
'tz' in kwargs):
264263
# only when subarr has the same tz
265264
from pandas.tseries.index import DatetimeIndex
266-
return DatetimeIndex(subarr, copy=copy, name=name,
267-
**kwargs)
265+
try:
266+
return DatetimeIndex(subarr, copy=copy,
267+
name=name, **kwargs)
268+
except tslib.OutOfBoundsDatetime:
269+
pass
268270

269271
elif inferred.startswith('timedelta'):
270272
from pandas.tseries.tdi import TimedeltaIndex

pandas/tests/indexes/test_datetimelike.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
from datetime import timedelta, time
3+
from datetime import datetime, timedelta, time
44

55
import numpy as np
66

@@ -12,7 +12,7 @@
1212
import pandas.util.testing as tm
1313

1414
import pandas as pd
15-
from pandas.lib import Timestamp
15+
from pandas.tslib import Timestamp, OutOfBoundsDatetime
1616

1717
from .common import Base
1818

@@ -336,6 +336,18 @@ def test_construction_base_constructor(self):
336336
tm.assert_index_equal(pd.Index(np.array(arr)),
337337
pd.DatetimeIndex(np.array(arr)))
338338

339+
def test_construction_outofbounds(self):
340+
# GH 13663
341+
dates = [datetime(3000, 1, 1), datetime(4000, 1, 1),
342+
datetime(5000, 1, 1), datetime(6000, 1, 1)]
343+
exp = Index(dates, dtype=object)
344+
# coerces to object
345+
tm.assert_index_equal(Index(dates), exp)
346+
347+
with tm.assertRaises(OutOfBoundsDatetime):
348+
# can't create DatetimeIndex
349+
DatetimeIndex(dates)
350+
339351
def test_astype(self):
340352
# GH 13149, GH 13209
341353
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

pandas/tests/test_algos.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from numpy.random import RandomState
66
from numpy import nan
7-
import datetime
7+
from datetime import datetime
88
from pandas import Series, Categorical, CategoricalIndex, Index
99
import pandas as pd
1010

@@ -121,7 +121,7 @@ def test_mixed_integer(self):
121121

122122
def test_unsortable(self):
123123
# GH 13714
124-
arr = np.array([1, 2, datetime.datetime.now(), 0, 3], dtype=object)
124+
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
125125
if compat.PY2 and not pd._np_version_under1p10:
126126
# RuntimeWarning: tp_compare didn't return -1 or -2 for exception
127127
with tm.assert_produces_warning(RuntimeWarning):
@@ -556,6 +556,18 @@ def test_value_counts_nat(self):
556556
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
557557
# TODO same for (timedelta)
558558

559+
def test_value_counts_datetime_outofbounds(self):
560+
# GH 13663
561+
s = pd.Series([datetime(3000, 1, 1), datetime(5000, 1, 1),
562+
datetime(5000, 1, 1), datetime(6000, 1, 1),
563+
datetime(3000, 1, 1), datetime(3000, 1, 1)])
564+
res = s.value_counts()
565+
566+
exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1),
567+
datetime(6000, 1, 1)], dtype=object)
568+
exp = pd.Series([3, 2, 1], index=exp_index)
569+
tm.assert_series_equal(res, exp)
570+
559571
def test_categorical(self):
560572
s = Series(pd.Categorical(list('aaabbc')))
561573
result = s.value_counts()
@@ -818,7 +830,7 @@ def _check(arr):
818830
def test_pad_backfill_object_segfault():
819831

820832
old = np.array([], dtype='O')
821-
new = np.array([datetime.datetime(2010, 12, 31)], dtype='O')
833+
new = np.array([datetime(2010, 12, 31)], dtype='O')
822834

823835
result = _algos.pad_object(old, new)
824836
expected = np.array([-1], dtype=np.int64)

0 commit comments

Comments
 (0)