Skip to content

Commit c9a7245

Browse files
committed
Merge pull request #3003 from jreback/value_counts_3002
BUG: fixed value_counts with datetime64[ns], GH 3002
2 parents e13306a + 338a721 commit c9a7245

File tree

5 files changed

+53
-3
lines changed

5 files changed

+53
-3
lines changed

RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ pandas 0.11.0
136136

137137
- Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_)
138138
- Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_)
139+
- Bug in value_counts of ``datetime64[ns]`` Series (GH3002_)
140+
- Fixed printing of ``NaT` in an index
139141
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
140142
- Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_)
141143

@@ -162,6 +164,7 @@ pandas 0.11.0
162164
.. _GH2967: https://github.com/pydata/pandas/issues/2967
163165
.. _GH2982: https://github.com/pydata/pandas/issues/2982
164166
.. _GH2989: https://github.com/pydata/pandas/issues/2989
167+
.. _GH3002: https://github.com/pydata/pandas/issues/3002
165168

166169

167170
pandas 0.10.1

pandas/core/algorithms.py

+8
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,14 @@ def value_counts(values, sort=True, ascending=False):
170170
if com.is_integer_dtype(values.dtype):
171171
values = com._ensure_int64(values)
172172
keys, counts = htable.value_count_int64(values)
173+
elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)):
174+
175+
dtype = values.dtype
176+
values = values.view(np.int64)
177+
keys, counts = htable.value_count_int64(values)
178+
179+
# convert the keys back to the dtype we came in
180+
keys = Series(keys,dtype=dtype)
173181
else:
174182
mask = com.isnull(values)
175183
values = com._ensure_object(values)

pandas/core/index.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas.lib import Timestamp
1414

1515
from pandas.util.decorators import cache_readonly
16+
from pandas.core.common import isnull
1617
import pandas.core.common as com
1718
from pandas.util import py3compat
1819
from pandas.core.config import get_option
@@ -94,6 +95,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
9495
return Index(result.to_pydatetime(), dtype=_o_dtype)
9596
else:
9697
return result
98+
elif issubclass(data.dtype.type, np.timedelta64):
99+
return Int64Index(data, copy=copy, name=name)
97100

98101
if dtype is not None:
99102
try:
@@ -435,9 +438,12 @@ def format(self, name=False, formatter=None):
435438
zero_time = time(0, 0)
436439
result = []
437440
for dt in self:
438-
if dt.time() != zero_time or dt.tzinfo is not None:
439-
return header + [u'%s' % x for x in self]
440-
result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
441+
if isnull(dt):
442+
result.append(u'NaT')
443+
else:
444+
if dt.time() != zero_time or dt.tzinfo is not None:
445+
return header + [u'%s' % x for x in self]
446+
result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
441447
return header + result
442448

443449
values = self.values

pandas/tests/test_format.py

+12
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,18 @@ def test_float_trim_zeros(self):
12261226
else:
12271227
self.assert_('+10' in line)
12281228

1229+
def test_datetimeindex(self):
1230+
1231+
from pandas import date_range, NaT, Timestamp
1232+
index = date_range('20130102',periods=6)
1233+
s = Series(1,index=index)
1234+
result = s.to_string()
1235+
self.assertTrue('2013-01-02' in result)
1236+
1237+
s = Series(2, index=[ Timestamp('20130111'), NaT ]).append(s)
1238+
result = s.to_string()
1239+
self.assertTrue('NaT' in result)
1240+
12291241
def test_timedelta64(self):
12301242

12311243
from pandas import date_range

pandas/tests/test_series.py

+21
Original file line numberDiff line numberDiff line change
@@ -2396,6 +2396,27 @@ def test_value_counts_nunique(self):
23962396
expected = Series([], dtype=np.int64)
23972397
assert_series_equal(hist, expected)
23982398

2399+
# GH 3002, datetime64[ns]
2400+
import StringIO
2401+
import pandas as pd
2402+
f = StringIO.StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE")
2403+
df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"])
2404+
s = df.dt.copy()
2405+
result = s.value_counts()
2406+
self.assert_(result.index.dtype == 'datetime64[ns]')
2407+
2408+
# with NaT
2409+
s = s.append(Series({ 4 : pd.NaT }))
2410+
result = s.value_counts()
2411+
self.assert_(result.index.dtype == 'datetime64[ns]')
2412+
2413+
# timedelta64[ns]
2414+
from datetime import timedelta
2415+
td = df.dt-df.dt+timedelta(1)
2416+
result = td.value_counts()
2417+
#self.assert_(result.index.dtype == 'timedelta64[ns]')
2418+
self.assert_(result.index.dtype == 'int64')
2419+
23992420
def test_unique(self):
24002421

24012422
# 714 also, dtype=float

0 commit comments

Comments
 (0)