diff --git a/RELEASE.rst b/RELEASE.rst index d7bd7c22ce326..2b911b0ed8170 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -136,6 +136,8 @@ pandas 0.11.0 - Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_) - Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_) + - Bug in value_counts of ``datetime64[ns]`` Series (GH3002_) + - Fixed printing of ``NaT` in an index - Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__) - Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_) @@ -162,6 +164,7 @@ pandas 0.11.0 .. _GH2967: https://github.com/pydata/pandas/issues/2967 .. _GH2982: https://github.com/pydata/pandas/issues/2982 .. _GH2989: https://github.com/pydata/pandas/issues/2989 +.. _GH3002: https://github.com/pydata/pandas/issues/3002 pandas 0.10.1 diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 256a51b909a19..413923262c6b0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -170,6 +170,14 @@ def value_counts(values, sort=True, ascending=False): if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) + elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): + + dtype = values.dtype + values = values.view(np.int64) + keys, counts = htable.value_count_int64(values) + + # convert the keys back to the dtype we came in + keys = Series(keys,dtype=dtype) else: mask = com.isnull(values) values = com._ensure_object(values) diff --git a/pandas/core/index.py b/pandas/core/index.py index 3a5f1d8147a99..42fe1c4ccb928 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -13,6 +13,7 @@ from pandas.lib import Timestamp from pandas.util.decorators import cache_readonly +from pandas.core.common import isnull import pandas.core.common as com from pandas.util import py3compat from pandas.core.config import get_option @@ -94,6 +95,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None): return Index(result.to_pydatetime(), dtype=_o_dtype) else: return result + elif issubclass(data.dtype.type, np.timedelta64): + return Int64Index(data, copy=copy, name=name) if dtype is not None: try: @@ -435,9 +438,12 @@ def format(self, name=False, formatter=None): zero_time = time(0, 0) result = [] for dt in self: - if dt.time() != zero_time or dt.tzinfo is not None: - return header + [u'%s' % x for x in self] - result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day)) + if isnull(dt): + result.append(u'NaT') + else: + if dt.time() != zero_time or dt.tzinfo is not None: + return header + [u'%s' % x for x in self] + result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day)) return header + result values = self.values diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index c2a399b493d13..1b436bfd443fc 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1226,6 +1226,18 @@ def test_float_trim_zeros(self): else: self.assert_('+10' in line) + def test_datetimeindex(self): + + from pandas import date_range, NaT, Timestamp + index = date_range('20130102',periods=6) + s = Series(1,index=index) + result = s.to_string() + self.assertTrue('2013-01-02' in result) + + s = Series(2, index=[ Timestamp('20130111'), NaT ]).append(s) + result = s.to_string() + self.assertTrue('NaT' in result) + def test_timedelta64(self): from pandas import date_range diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 9ea5e59447475..cef309fd59503 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2396,6 +2396,27 @@ def test_value_counts_nunique(self): expected = Series([], dtype=np.int64) assert_series_equal(hist, expected) + # GH 3002, datetime64[ns] + import StringIO + import pandas as pd + f = StringIO.StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") + df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"]) + s = df.dt.copy() + result = s.value_counts() + self.assert_(result.index.dtype == 'datetime64[ns]') + + # with NaT + s = s.append(Series({ 4 : pd.NaT })) + result = s.value_counts() + self.assert_(result.index.dtype == 'datetime64[ns]') + + # timedelta64[ns] + from datetime import timedelta + td = df.dt-df.dt+timedelta(1) + result = td.value_counts() + #self.assert_(result.index.dtype == 'timedelta64[ns]') + self.assert_(result.index.dtype == 'int64') + def test_unique(self): # 714 also, dtype=float