diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 691345ad26e58..c609cb04db028 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -316,6 +316,7 @@ Timezones - Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`) - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) Offsets ^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 818dd1b408518..65ca467a05840 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -20,7 +20,7 @@ is_bool_dtype, is_categorical_dtype, is_numeric_dtype, - is_datetime64_dtype, + is_datetime64_any_dtype, is_timedelta64_dtype, is_datetime64tz_dtype, is_list_like, @@ -8531,12 +8531,13 @@ def describe_categorical_1d(data): if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if is_datetime64_dtype(data): + if is_datetime64_any_dtype(data): + tz = data.dt.tz asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] - result += [tslib.Timestamp(top), freq, - tslib.Timestamp(asint.min()), - tslib.Timestamp(asint.max())] + result += [tslib.Timestamp(top, tz=tz), freq, + tslib.Timestamp(asint.min(), tz=tz), + tslib.Timestamp(asint.max(), tz=tz)] else: names += ['top', 'freq'] result += [top, freq] diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index d357208813dd8..c0e9b89c1877f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -417,6 +417,28 @@ def test_describe_timedelta_values(self): "max 5 days 00:00:00 0 days 05:00:00") assert repr(res) == exp_repr + def test_describe_tz_values(self, tz_naive_fixture): + # GH 21332 + tz = tz_naive_fixture + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = pd.DataFrame({'s1': s1, 's2': s2}) + + expected = DataFrame({'s1': [5, np.nan, np.nan, np.nan, np.nan, np.nan, + 2, 1.581139, 0, 1, 2, 3, 4], + 's2': [5, 5, s2.value_counts().index[0], 1, + start.tz_localize(tz), + end.tz_localize(tz), np.nan, np.nan, + np.nan, np.nan, np.nan, np.nan, np.nan]}, + index=['count', 'unique', 'top', 'freq', 'first', + 'last', 'mean', 'std', 'min', '25%', '50%', + '75%', 'max'] + ) + res = df.describe(include='all') + tm.assert_frame_equal(res, expected) + def test_reduce_mixed_frame(self): # GH 6806 df = DataFrame({ diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fcfaff9b11002..b574b6dce930c 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -336,6 +336,23 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) + def test_describe_with_tz(self, tz_naive_fixture): + # GH 21332 + tz = tz_naive_fixture + name = tz_naive_fixture + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s = Series(date_range(start, end, tz=tz), name=name) + result = s.describe() + expected = Series( + [5, 5, s.value_counts().index[0], 1, start.tz_localize(tz), + end.tz_localize(tz) + ], + name=name, + index=['count', 'unique', 'top', 'freq', 'first', 'last'] + ) + tm.assert_series_equal(result, expected) + def test_argsort(self): self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort()