Skip to content

Commit 1c6a0e0

Browse files
committed
ENH: show percentiles in timestamp describe (#30164)
1 parent f287794 commit 1c6a0e0

File tree

1 file changed

+21
-22
lines changed

1 file changed

+21
-22
lines changed

pandas/core/generic.py

+21-22
Original file line numberDiff line numberDiff line change
@@ -9776,26 +9776,8 @@ def describe_categorical_1d(data):
97769776
dtype = None
97779777
if result[1] > 0:
97789778
top, freq = objcounts.index[0], objcounts.iloc[0]
9779-
9780-
if is_datetime64_any_dtype(data):
9781-
tz = data.dt.tz
9782-
asint = data.dropna().values.view("i8")
9783-
top = Timestamp(top)
9784-
if top.tzinfo is not None and tz is not None:
9785-
# Don't tz_localize(None) if key is already tz-aware
9786-
top = top.tz_convert(tz)
9787-
else:
9788-
top = top.tz_localize(tz)
9789-
names += ["top", "freq", "first", "last"]
9790-
result += [
9791-
top,
9792-
freq,
9793-
Timestamp(asint.min(), tz=tz),
9794-
Timestamp(asint.max(), tz=tz),
9795-
]
9796-
else:
9797-
names += ["top", "freq"]
9798-
result += [top, freq]
9779+
names += ["top", "freq"]
9780+
result += [top, freq]
97999781

98009782
# If the DataFrame is empty, set 'top' and 'freq' to None
98019783
# to maintain output shape consistency
@@ -9806,11 +9788,28 @@ def describe_categorical_1d(data):
98069788

98079789
return pd.Series(result, index=names, name=data.name, dtype=dtype)
98089790

9791+
def describe_timestamp_1d(data):
9792+
tz = data.dt.tz
9793+
asint = data.dropna().values.view("i8")
9794+
stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
9795+
d = (
9796+
[
9797+
asint.shape[0],
9798+
Timestamp(asint.mean(), tz=tz),
9799+
Timestamp(asint.min(), tz=tz),
9800+
]
9801+
+ data.quantile(percentiles).tolist()
9802+
+ [Timestamp(asint.max(), tz=tz)]
9803+
)
9804+
return pd.Series(d, index=stat_index, name=data.name)
9805+
98099806
def describe_1d(data):
98109807
if is_bool_dtype(data):
98119808
return describe_categorical_1d(data)
98129809
elif is_numeric_dtype(data):
98139810
return describe_numeric_1d(data)
9811+
elif is_datetime64_any_dtype(data):
9812+
return describe_timestamp_1d(data)
98149813
elif is_timedelta64_dtype(data):
98159814
return describe_numeric_1d(data)
98169815
else:
@@ -9819,8 +9818,8 @@ def describe_1d(data):
98199818
if self.ndim == 1:
98209819
return describe_1d(self)
98219820
elif (include is None) and (exclude is None):
9822-
# when some numerics are found, keep only numerics
9823-
data = self.select_dtypes(include=[np.number])
9821+
# when some numerics or timestamps are found, keep only those
9822+
data = self.select_dtypes(include=[np.number, np.datetime64])
98249823
if len(data.columns) == 0:
98259824
data = self
98269825
elif include == "all":

0 commit comments

Comments
 (0)