From d95e2e21b6daf256a53cf5d3c122f825d522a5e0 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 18 Apr 2015 10:08:59 +0900 Subject: [PATCH] BUG: GroupBy.size doesnt attach index name properly if grouped by TimeGrouper --- doc/source/whatsnew/v0.16.1.txt | 2 +- pandas/core/groupby.py | 6 ++++-- pandas/tseries/tests/test_resample.py | 16 +++++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 659aa6786b366..7639408d2c644 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -125,7 +125,7 @@ Bug Fixes - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`) - Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`) - +- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 6b2c9639ac71f..4ef3bbce85467 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1780,12 +1780,14 @@ def size(self): Compute group sizes """ - base = Series(np.zeros(len(self.result_index), dtype=np.int64), - index=self.result_index) + index = self.result_index + base = Series(np.zeros(len(index), dtype=np.int64), index=index) indices = self.indices for k, v in compat.iteritems(indices): indices[k] = len(v) bin_counts = Series(indices, dtype=np.int64) + # make bin_counts.index to have same name to preserve it + bin_counts.index.name = index.name result = base.add(bin_counts, fill_value=0) # addition with fill_value changes dtype to float64 result = result.astype(np.int64) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index c338bbeae79c7..2ae311e044a75 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -875,23 +875,23 @@ def test_resmaple_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') df = DataFrame([5], index=dti) - assert_frame_equal(df.resample(rule='D', how='sum'), + assert_frame_equal(df.resample(rule='D', how='sum'), DataFrame([5], index=df.index.normalize())) df.resample(rule='MS', how='sum') assert_frame_equal(df.resample(rule='MS', how='sum'), - DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], + DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], tz='US/Eastern'))) dti = date_range('2013-09-30', '2013-11-02', freq='30Min', tz='Europe/Paris') values = range(dti.size) df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype='int64') how = {"a": "min", "b": "max", "c": "count"} - + assert_frame_equal(df.resample("W-MON", how=how)[["a", "b", "c"]], DataFrame({"a": [0, 48, 384, 720, 1056, 1394], "b": [47, 383, 719, 1055, 1393, 1586], "c": [48, 336, 336, 336, 338, 193]}, - index=date_range('9/30/2013', '11/4/2013', + index=date_range('9/30/2013', '11/4/2013', freq='W-MON', tz='Europe/Paris')), 'W-MON Frequency') @@ -899,7 +899,7 @@ def test_resmaple_dst_anchor(self): DataFrame({"a": [0, 48, 720, 1394], "b": [47, 719, 1393, 1586], "c": [48, 672, 674, 193]}, - index=date_range('9/30/2013', '11/11/2013', + index=date_range('9/30/2013', '11/11/2013', freq='2W-MON', tz='Europe/Paris')), '2W-MON Frequency') @@ -907,7 +907,7 @@ def test_resmaple_dst_anchor(self): DataFrame({"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]}, - index=date_range('9/1/2013', '11/1/2013', + index=date_range('9/1/2013', '11/1/2013', freq='MS', tz='Europe/Paris')), 'MS Frequency') @@ -915,7 +915,7 @@ def test_resmaple_dst_anchor(self): DataFrame({"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]}, - index=date_range('9/1/2013', '11/1/2013', + index=date_range('9/1/2013', '11/1/2013', freq='2MS', tz='Europe/Paris')), '2MS Frequency') @@ -1553,6 +1553,8 @@ def test_aggregate_with_nat(self): expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') dt_result = getattr(dt_grouped, func)() assert_series_equal(expected, dt_result) + # GH 9925 + self.assertEqual(dt_result.index.name, 'key') # if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth' doesn't work yet