From 39f0e676c463cbb072364797cd8e0fe1021f80b1 Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Tue, 5 Jun 2018 14:26:26 -0700 Subject: [PATCH 1/8] Series.describe returns first and last for tz-aware datetimes GH issue 21328 --- pandas/core/generic.py | 11 ++++++----- pandas/tests/series/test_analytics.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 818dd1b408518..65ca467a05840 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -20,7 +20,7 @@ is_bool_dtype, is_categorical_dtype, is_numeric_dtype, - is_datetime64_dtype, + is_datetime64_any_dtype, is_timedelta64_dtype, is_datetime64tz_dtype, is_list_like, @@ -8531,12 +8531,13 @@ def describe_categorical_1d(data): if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if is_datetime64_dtype(data): + if is_datetime64_any_dtype(data): + tz = data.dt.tz asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] - result += [tslib.Timestamp(top), freq, - tslib.Timestamp(asint.min()), - tslib.Timestamp(asint.max())] + result += [tslib.Timestamp(top, tz=tz), freq, + tslib.Timestamp(asint.min(), tz=tz), + tslib.Timestamp(asint.max(), tz=tz)] else: names += ['top', 'freq'] result += [top, freq] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fcfaff9b11002..4edafae6020ee 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -336,6 +336,31 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s = Series(date_range(start, end), name="tz-naive") + result = s.describe() + expected = Series( + [5, 5, start, 1, start, end], + name="tz-naive", + index=['count', 'unique', 'top', 'freq', 'first', 'last'] + ) + tm.assert_series_equal(result, expected) + + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + tz = "US/Eastern" + s = Series(date_range(start, end, tz=tz), name="tz-aware") + result = s.describe() + expected = Series( + [5, 5, start.tz_localize(tz), 1, start.tz_localize(tz), + end.tz_localize(tz) + ], + name="tz-aware", + index=['count', 'unique', 'top', 'freq', 'first', 'last'] + ) + tm.assert_series_equal(result, expected) + def test_argsort(self): self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort() From be19766120ce37f659ef812ab8026c22c987612e Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Tue, 5 Jun 2018 15:00:11 -0700 Subject: [PATCH 2/8] parameterize tests --- pandas/tests/series/test_analytics.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 4edafae6020ee..41d12bc0935f8 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -336,27 +336,21 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_describe_dt(self, tz): + if tz is None: + name = "tz-naive" + else: + name = "tz-aware" start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) - s = Series(date_range(start, end), name="tz-naive") - result = s.describe() - expected = Series( - [5, 5, start, 1, start, end], - name="tz-naive", - index=['count', 'unique', 'top', 'freq', 'first', 'last'] - ) - tm.assert_series_equal(result, expected) - - start = Timestamp(2018, 1, 1) - end = Timestamp(2018, 1, 5) - tz = "US/Eastern" - s = Series(date_range(start, end, tz=tz), name="tz-aware") + s = Series(date_range(start, end, tz=tz), name=name) result = s.describe() expected = Series( [5, 5, start.tz_localize(tz), 1, start.tz_localize(tz), end.tz_localize(tz) ], - name="tz-aware", + name=name, index=['count', 'unique', 'top', 'freq', 'first', 'last'] ) tm.assert_series_equal(result, expected) From 7ddf70603a664aafaaaaf73fdbdf07009c6a88b2 Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Wed, 6 Jun 2018 09:17:19 -0700 Subject: [PATCH 3/8] parameterize names --- pandas/tests/series/test_analytics.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 41d12bc0935f8..561c473d2cb96 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -336,12 +336,11 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("tz", [None, "US/Eastern"]) - def test_describe_dt(self, tz): - if tz is None: - name = "tz-naive" - else: - name = "tz-aware" + @pytest.mark.parametrize("tz, name", [ + (None, "tz-naive"), + ("US/Eastern", "tz-aware") + ]) + def test_describe_dt(self, tz, name): start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s = Series(date_range(start, end, tz=tz), name=name) From 3eeec04fae6d5b742d886b4ecd6f74cf54aa2f7b Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Tue, 19 Jun 2018 10:52:12 -0700 Subject: [PATCH 4/8] use tz_naive_fixture and fix top --- pandas/tests/series/test_analytics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 561c473d2cb96..210db831a79b9 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -336,17 +336,15 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("tz, name", [ - (None, "tz-naive"), - ("US/Eastern", "tz-aware") - ]) - def test_describe_dt(self, tz, name): + def test_describe_with_tz(self, tz_naive_fixture): + tz = tz_naive_fixture + name = tz_naive_fixture start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s = Series(date_range(start, end, tz=tz), name=name) result = s.describe() expected = Series( - [5, 5, start.tz_localize(tz), 1, start.tz_localize(tz), + [5, 5, s.value_counts().index[0], 1, start.tz_localize(tz), end.tz_localize(tz) ], name=name, From 0a9e0c65b9e32ea7b4f9ad287318172fb593cc48 Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Tue, 19 Jun 2018 10:52:30 -0700 Subject: [PATCH 5/8] add tz describe test for df --- pandas/tests/frame/test_analytics.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index d357208813dd8..a6f79dca0f7cf 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -417,6 +417,27 @@ def test_describe_timedelta_values(self): "max 5 days 00:00:00 0 days 05:00:00") assert repr(res) == exp_repr + def test_describe_tz_values(self, tz_naive_fixture): + tz = tz_naive_fixture + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = pd.DataFrame({'s1': s1, 's2': s2}) + + expected = DataFrame({'s1': [5, np.nan, np.nan, np.nan, np.nan, np.nan, + 2, 1.581139, 0, 1, 2, 3, 4], + 's2': [5, 5, s2.value_counts().index[0], 1, + start.tz_localize(tz), + end.tz_localize(tz), np.nan, np.nan, + np.nan, np.nan, np.nan, np.nan, np.nan]}, + index=['count', 'unique', 'top', 'freq', 'first', + 'last', 'mean', 'std', 'min', '25%', '50%', + '75%', 'max'] + ) + res = df.describe(include='all') + tm.assert_frame_equal(res, expected) + def test_reduce_mixed_frame(self): # GH 6806 df = DataFrame({ From 7fe331deac7e93a77d72da89fd071050111892ff Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Tue, 19 Jun 2018 10:54:47 -0700 Subject: [PATCH 6/8] add bugfix to whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 691345ad26e58..aa173f23c6dd8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -295,6 +295,8 @@ Datetimelike ^^^^^^^^^^^^ - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) +- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) +- Fixed bug where `describe` method on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) Timedelta ^^^^^^^^^ From dd13740a90acd43b574608724619c52a0e0ff72f Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Thu, 5 Jul 2018 17:50:10 -0700 Subject: [PATCH 7/8] fix formatting in whatsnew and add issue number to tests --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/tests/frame/test_analytics.py | 1 + pandas/tests/series/test_analytics.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index aa173f23c6dd8..f0d62627462bb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -296,7 +296,7 @@ Datetimelike - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) -- Fixed bug where `describe` method on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a6f79dca0f7cf..c0e9b89c1877f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -418,6 +418,7 @@ def test_describe_timedelta_values(self): assert repr(res) == exp_repr def test_describe_tz_values(self, tz_naive_fixture): + # GH 21332 tz = tz_naive_fixture s1 = Series(range(5)) start = Timestamp(2018, 1, 1) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 210db831a79b9..b574b6dce930c 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -337,6 +337,7 @@ def test_describe(self): tm.assert_series_equal(result, expected) def test_describe_with_tz(self, tz_naive_fixture): + # GH 21332 tz = tz_naive_fixture name = tz_naive_fixture start = Timestamp(2018, 1, 1) From 0af475822bd69ec074a97682f8520b4da1fe29b1 Mon Sep 17 00:00:00 2001 From: Louis Potok Date: Fri, 6 Jul 2018 07:29:17 -0700 Subject: [PATCH 8/8] final bugfix --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f0d62627462bb..c609cb04db028 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -295,8 +295,6 @@ Datetimelike ^^^^^^^^^^^^ - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) -- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) -- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) Timedelta ^^^^^^^^^ @@ -318,6 +316,7 @@ Timezones - Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`) - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) Offsets ^^^^^^^