ENH: show percentiles in timestamp describe (#30164)

david-cortes · david-cortes · commit 1c6a0e0f3031 · 2019-12-11T16:26:23.000+02:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9776,26 +9776,8 @@ def describe_categorical_1d(data):
             dtype = None
             if result[1] > 0:
                 top, freq = objcounts.index[0], objcounts.iloc[0]
-
-                if is_datetime64_any_dtype(data):
-                    tz = data.dt.tz
-                    asint = data.dropna().values.view("i8")
-                    top = Timestamp(top)
-                    if top.tzinfo is not None and tz is not None:
-                        # Don't tz_localize(None) if key is already tz-aware
-                        top = top.tz_convert(tz)
-                    else:
-                        top = top.tz_localize(tz)
-                    names += ["top", "freq", "first", "last"]
-                    result += [
-                        top,
-                        freq,
-                        Timestamp(asint.min(), tz=tz),
-                        Timestamp(asint.max(), tz=tz),
-                    ]
-                else:
-                    names += ["top", "freq"]
-                    result += [top, freq]
+                names += ["top", "freq"]
+                result += [top, freq]
 
             # If the DataFrame is empty, set 'top' and 'freq' to None
             # to maintain output shape consistency
@@ -9806,11 +9788,28 @@ def describe_categorical_1d(data):
 
             return pd.Series(result, index=names, name=data.name, dtype=dtype)
 
+        def describe_timestamp_1d(data):
+            tz = data.dt.tz
+            asint = data.dropna().values.view("i8")
+            stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
+            d = (
+                [
+                    asint.shape[0],
+                    Timestamp(asint.mean(), tz=tz),
+                    Timestamp(asint.min(), tz=tz),
+                ]
+                + data.quantile(percentiles).tolist()
+                + [Timestamp(asint.max(), tz=tz)]
+            )
+            return pd.Series(d, index=stat_index, name=data.name)
+
         def describe_1d(data):
             if is_bool_dtype(data):
                 return describe_categorical_1d(data)
             elif is_numeric_dtype(data):
                 return describe_numeric_1d(data)
+            elif is_datetime64_any_dtype(data):
+                return describe_timestamp_1d(data)
             elif is_timedelta64_dtype(data):
                 return describe_numeric_1d(data)
             else:
@@ -9819,8 +9818,8 @@ def describe_1d(data):
         if self.ndim == 1:
             return describe_1d(self)
         elif (include is None) and (exclude is None):
-            # when some numerics are found, keep only numerics
-            data = self.select_dtypes(include=[np.number])
+            # when some numerics or timestamps are found, keep only those
+            data = self.select_dtypes(include=[np.number, np.datetime64])
             if len(data.columns) == 0:
                 data = self
         elif include == "all":