ENH: show percentiles in timestamp describe (#30164)

david-cortes · david-cortes · commit c72764a618d9 · 2019-12-12T15:17:59.000+02:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -28,7 +28,7 @@
 
 from pandas._config import config
 
-from pandas._libs import Timestamp, iNaT, properties
+from pandas._libs import Timestamp, iNaT, NaT, properties
 from pandas.compat import set_function_name
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
@@ -9776,26 +9776,8 @@ def describe_categorical_1d(data):
             dtype = None
             if result[1] > 0:
                 top, freq = objcounts.index[0], objcounts.iloc[0]
-
-                if is_datetime64_any_dtype(data):
-                    tz = data.dt.tz
-                    asint = data.dropna().values.view("i8")
-                    top = Timestamp(top)
-                    if top.tzinfo is not None and tz is not None:
-                        # Don't tz_localize(None) if key is already tz-aware
-                        top = top.tz_convert(tz)
-                    else:
-                        top = top.tz_localize(tz)
-                    names += ["top", "freq", "first", "last"]
-                    result += [
-                        top,
-                        freq,
-                        Timestamp(asint.min(), tz=tz),
-                        Timestamp(asint.max(), tz=tz),
-                    ]
-                else:
-                    names += ["top", "freq"]
-                    result += [top, freq]
+                names += ["top", "freq"]
+                result += [top, freq]
 
             # If the DataFrame is empty, set 'top' and 'freq' to None
             # to maintain output shape consistency
@@ -9806,11 +9788,30 @@ def describe_categorical_1d(data):
 
             return pd.Series(result, index=names, name=data.name, dtype=dtype)
 
+        def describe_timestamp_1d(data):
+            #GH-30164
+            tz = data.dt.tz
+            asint = data.dropna().values.view("i8")
+            is_empty = asint.shape[0] == 0
+            stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
+            d = (
+                [
+                    asint.shape[0],
+                    Timestamp(asint.mean(), tz=tz) if not is_empty else NaT,
+                    Timestamp(asint.min(), tz=tz) if not is_empty else NaT,
+                ]
+                + data.quantile(percentiles).tolist()
+                + [Timestamp(asint.max(), tz=tz) if not is_empty else NaT]
+            )
+            return pd.Series(d, index=stat_index, name=data.name)
+
         def describe_1d(data):
             if is_bool_dtype(data):
                 return describe_categorical_1d(data)
             elif is_numeric_dtype(data):
                 return describe_numeric_1d(data)
+            elif is_datetime64_any_dtype(data):
+                return describe_timestamp_1d(data)
             elif is_timedelta64_dtype(data):
                 return describe_numeric_1d(data)
             else:
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -781,52 +781,19 @@ def test_describe_tz_values(self, tz_naive_fixture):
 
         expected = DataFrame(
             {
-                "s1": [
-                    5,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    2,
-                    1.581139,
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                ],
+                "s1": [5, 2, 0, 1, 2, 3, 4, 1.581139,],
                 "s2": [
                     5,
-                    5,
-                    s2.value_counts().index[0],
-                    1,
+                    Timestamp(2018, 1, 3).tz_localize(tz),
                     start.tz_localize(tz),
+                    s2[1],
+                    s2[2],
+                    s2[3],
                     end.tz_localize(tz),
                     np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
                 ],
             },
-            index=[
-                "count",
-                "unique",
-                "top",
-                "freq",
-                "first",
-                "last",
-                "mean",
-                "std",
-                "min",
-                "25%",
-                "50%",
-                "75%",
-                "max",
-            ],
+            index=["count", "mean", "min", "25%", "50%", "75%", "max", "std",],
         )
         result = df.describe(include="all")
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -77,14 +77,15 @@ def test_describe_with_tz(self, tz_naive_fixture):
         expected = Series(
             [
                 5,
-                5,
-                s.value_counts().index[0],
-                1,
+                Timestamp(2018, 1, 3).tz_localize(tz),
                 start.tz_localize(tz),
+                s[1],
+                s[2],
+                s[3],
                 end.tz_localize(tz),
             ],
             name=name,
-            index=["count", "unique", "top", "freq", "first", "last"],
+            index=["count", "mean", "min", "25%", "50%", "75%", "max"],
         )
         tm.assert_series_equal(result, expected)