Skip to content

Commit 4b5a7c7

Browse files
committed
BUG: DTI.value_counts doesnt preserve tz
1 parent 415fbfc commit 4b5a7c7

File tree

4 files changed

+75
-16
lines changed

4 files changed

+75
-16
lines changed

doc/source/v0.15.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,12 @@ Bug Fixes
290290

291291

292292

293+
- Bug in ``DatetimeIndex.value_counts`` doesn't preserve tz (:issue:`7735`)
294+
- Bug in ``PeriodIndex.value_counts`` results in ``Int64Index`` (:issue:`7735`)
295+
296+
297+
298+
293299

294300

295301

pandas/core/algorithms.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
197197
from pandas.core.series import Series
198198
from pandas.tools.tile import cut
199199

200+
is_period = getattr(values, 'inferred_type', None) == 'period'
200201
values = Series(values).values
201202
is_category = com.is_categorical_dtype(values.dtype)
202203

@@ -212,11 +213,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
212213
values = cat.codes
213214

214215
dtype = values.dtype
215-
if com.is_integer_dtype(dtype):
216-
values = com._ensure_int64(values)
217-
keys, counts = htable.value_count_int64(values)
218216

219-
elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
217+
if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) or is_period:
220218
values = values.view(np.int64)
221219
keys, counts = htable.value_count_int64(values)
222220

@@ -227,6 +225,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
227225
# convert the keys back to the dtype we came in
228226
keys = keys.astype(dtype)
229227

228+
elif com.is_integer_dtype(dtype):
229+
values = com._ensure_int64(values)
230+
keys, counts = htable.value_count_int64(values)
231+
230232
else:
231233
values = com._ensure_object(values)
232234
mask = com.isnull(values)

pandas/core/base.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,18 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
275275
counts : Series
276276
"""
277277
from pandas.core.algorithms import value_counts
278-
return value_counts(self.values, sort=sort, ascending=ascending,
279-
normalize=normalize, bins=bins, dropna=dropna)
278+
from pandas.tseries.api import DatetimeIndex, PeriodIndex
279+
result = value_counts(self, sort=sort, ascending=ascending,
280+
normalize=normalize, bins=bins, dropna=dropna)
281+
282+
if isinstance(self, PeriodIndex):
283+
# preserve freq
284+
result.index = self._simple_new(result.index.values, self.name,
285+
freq=self.freq)
286+
elif isinstance(self, DatetimeIndex):
287+
result.index = self._simple_new(result.index.values, self.name,
288+
tz=getattr(self, 'tz', None))
289+
return result
280290

281291
def unique(self):
282292
"""
@@ -542,5 +552,3 @@ def __sub__(self, other):
542552

543553
def _add_delta(self, other):
544554
return NotImplemented
545-
546-

pandas/tests/test_base.py

+51-8
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,9 @@ def test_value_counts_unique_nunique(self):
267267
# skips int64 because it doesn't allow to include nan or None
268268
continue
269269

270-
if o.values.dtype == 'datetime64[ns]' and _np_version_under1p7:
271-
# Unable to assign None
270+
if ((isinstance(o, Int64Index) and not isinstance(o,
271+
(DatetimeIndex, PeriodIndex)))):
272+
# skips int64 because it doesn't allow to include nan or None
272273
continue
273274

274275
# special assign to the numpy array
@@ -283,12 +284,8 @@ def test_value_counts_unique_nunique(self):
283284
else:
284285
o = klass(np.repeat(values, range(1, len(o) + 1)))
285286

286-
if isinstance(o, DatetimeIndex):
287-
expected_s_na = Series(list(range(10, 2, -1)) + [3], index=values[9:0:-1])
288-
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1])
289-
else:
290-
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=values[9:0:-1], dtype='int64')
291-
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1], dtype='int64')
287+
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=values[9:0:-1], dtype='int64')
288+
expected_s = Series(list(range(10, 2, -1)), index=values[9:1:-1], dtype='int64')
292289

293290
tm.assert_series_equal(o.value_counts(dropna=False), expected_s_na)
294291
tm.assert_series_equal(o.value_counts(), expected_s)
@@ -709,6 +706,28 @@ def test_sub_isub(self):
709706
rng -= 1
710707
tm.assert_index_equal(rng, expected)
711708

709+
def test_value_counts(self):
710+
# GH 7735
711+
for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']:
712+
idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
713+
# create repeated values, 'n'th element is repeated by n+1 times
714+
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)
715+
716+
exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, tz=tz)
717+
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
718+
tm.assert_series_equal(idx.value_counts(), expected)
719+
720+
idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
721+
'2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], tz=tz)
722+
723+
exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], tz=tz)
724+
expected = Series([3, 2], index=exp_idx)
725+
tm.assert_series_equal(idx.value_counts(), expected)
726+
727+
exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz)
728+
expected = Series([3, 2, 1], index=exp_idx)
729+
tm.assert_series_equal(idx.value_counts(dropna=False), expected)
730+
712731

713732
class TestPeriodIndexOps(Ops):
714733
_allowed = '_allow_period_index_ops'
@@ -968,6 +987,30 @@ def test_sub_isub(self):
968987
rng -= 1
969988
tm.assert_index_equal(rng, expected)
970989

990+
def test_value_counts(self):
991+
# GH 7735
992+
idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10)
993+
# create repeated values, 'n'th element is repeated by n+1 times
994+
idx = PeriodIndex(np.repeat(idx.values, range(1, len(idx) + 1)), freq='H')
995+
996+
exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00', '2011-01-01 16:00',
997+
'2011-01-01 15:00', '2011-01-01 14:00', '2011-01-01 13:00',
998+
'2011-01-01 12:00', '2011-01-01 11:00', '2011-01-01 10:00',
999+
'2011-01-01 09:00'], freq='H')
1000+
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
1001+
tm.assert_series_equal(idx.value_counts(), expected)
1002+
1003+
idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00',
1004+
'2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], freq='H')
1005+
1006+
exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'], freq='H')
1007+
expected = Series([3, 2], index=exp_idx)
1008+
tm.assert_series_equal(idx.value_counts(), expected)
1009+
1010+
exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], freq='H')
1011+
expected = Series([3, 2, 1], index=exp_idx)
1012+
tm.assert_series_equal(idx.value_counts(dropna=False), expected)
1013+
9711014

9721015
if __name__ == '__main__':
9731016
import nose

0 commit comments

Comments
 (0)