Skip to content

BUG: value_counts may raise OutOfBoundsDatetime #13772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,8 @@ Bug Fixes
- Bug in invalid datetime parsing in ``to_datetime`` and ``DatetimeIndex`` may raise ``TypeError`` rather than ``ValueError`` (:issue:`11169`, :issue:`11287`)
- Bug in ``Index`` created with tz-aware ``Timestamp`` and mismatched ``tz`` option incorrectly coerces timezone (:issue:`13692`)
- Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`)
- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`)
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)

- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,13 @@ def _set_axis(self, axis, labels, fastpath=False):

if not isinstance(labels,
(DatetimeIndex, PeriodIndex, TimedeltaIndex)):
labels = DatetimeIndex(labels)

# need to set here becuase we changed the index
if fastpath:
self._data.set_axis(axis, labels)
try:
labels = DatetimeIndex(labels)
# need to set here becuase we changed the index
if fastpath:
self._data.set_axis(axis, labels)
except tslib.OutOfBoundsDatetime:
pass
self._set_subtyp(is_all_dates)

object.__setattr__(self, '_index', labels)
Expand Down
8 changes: 5 additions & 3 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,15 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
pass
elif inferred != 'string':
if inferred.startswith('datetime'):

if (lib.is_datetime_with_singletz_array(subarr) or
'tz' in kwargs):
# only when subarr has the same tz
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(subarr, copy=copy, name=name,
**kwargs)
try:
return DatetimeIndex(subarr, copy=copy,
name=name, **kwargs)
except tslib.OutOfBoundsDatetime:
pass

elif inferred.startswith('timedelta'):
from pandas.tseries.tdi import TimedeltaIndex
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/indexes/test_datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

from datetime import timedelta, time
from datetime import datetime, timedelta, time

import numpy as np

Expand All @@ -12,7 +12,7 @@
import pandas.util.testing as tm

import pandas as pd
from pandas.lib import Timestamp
from pandas.tslib import Timestamp, OutOfBoundsDatetime

from .common import Base

Expand Down Expand Up @@ -336,6 +336,18 @@ def test_construction_base_constructor(self):
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))

def test_construction_outofbounds(self):
# GH 13663
dates = [datetime(3000, 1, 1), datetime(4000, 1, 1),
datetime(5000, 1, 1), datetime(6000, 1, 1)]
exp = Index(dates, dtype=object)
# coerces to object
tm.assert_index_equal(Index(dates), exp)

with tm.assertRaises(OutOfBoundsDatetime):
# can't create DatetimeIndex
DatetimeIndex(dates)

def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from numpy.random import RandomState
from numpy import nan
import datetime
from datetime import datetime
from pandas import Series, Categorical, CategoricalIndex, Index
import pandas as pd

Expand Down Expand Up @@ -121,7 +121,7 @@ def test_mixed_integer(self):

def test_unsortable(self):
# GH 13714
arr = np.array([1, 2, datetime.datetime.now(), 0, 3], dtype=object)
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
if compat.PY2 and not pd._np_version_under1p10:
# RuntimeWarning: tp_compare didn't return -1 or -2 for exception
with tm.assert_produces_warning(RuntimeWarning):
Expand Down Expand Up @@ -556,6 +556,18 @@ def test_value_counts_nat(self):
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
# TODO same for (timedelta)

def test_value_counts_datetime_outofbounds(self):
# GH 13663
s = pd.Series([datetime(3000, 1, 1), datetime(5000, 1, 1),
datetime(5000, 1, 1), datetime(6000, 1, 1),
datetime(3000, 1, 1), datetime(3000, 1, 1)])
res = s.value_counts()

exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1),
datetime(6000, 1, 1)], dtype=object)
exp = pd.Series([3, 2, 1], index=exp_index)
tm.assert_series_equal(res, exp)

def test_categorical(self):
s = Series(pd.Categorical(list('aaabbc')))
result = s.value_counts()
Expand Down Expand Up @@ -818,7 +830,7 @@ def _check(arr):
def test_pad_backfill_object_segfault():

old = np.array([], dtype='O')
new = np.array([datetime.datetime(2010, 12, 31)], dtype='O')
new = np.array([datetime(2010, 12, 31)], dtype='O')

result = _algos.pad_object(old, new)
expected = np.array([-1], dtype=np.int64)
Expand Down