Skip to content

Commit c72764a

Browse files
committed
ENH: show percentiles in timestamp describe (#30164)
1 parent f287794 commit c72764a

File tree

3 files changed

+33
-64
lines changed

3 files changed

+33
-64
lines changed

pandas/core/generic.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from pandas._config import config
3030

31-
from pandas._libs import Timestamp, iNaT, properties
31+
from pandas._libs import Timestamp, iNaT, NaT, properties
3232
from pandas.compat import set_function_name
3333
from pandas.compat._optional import import_optional_dependency
3434
from pandas.compat.numpy import function as nv
@@ -9776,26 +9776,8 @@ def describe_categorical_1d(data):
97769776
dtype = None
97779777
if result[1] > 0:
97789778
top, freq = objcounts.index[0], objcounts.iloc[0]
9779-
9780-
if is_datetime64_any_dtype(data):
9781-
tz = data.dt.tz
9782-
asint = data.dropna().values.view("i8")
9783-
top = Timestamp(top)
9784-
if top.tzinfo is not None and tz is not None:
9785-
# Don't tz_localize(None) if key is already tz-aware
9786-
top = top.tz_convert(tz)
9787-
else:
9788-
top = top.tz_localize(tz)
9789-
names += ["top", "freq", "first", "last"]
9790-
result += [
9791-
top,
9792-
freq,
9793-
Timestamp(asint.min(), tz=tz),
9794-
Timestamp(asint.max(), tz=tz),
9795-
]
9796-
else:
9797-
names += ["top", "freq"]
9798-
result += [top, freq]
9779+
names += ["top", "freq"]
9780+
result += [top, freq]
97999781

98009782
# If the DataFrame is empty, set 'top' and 'freq' to None
98019783
# to maintain output shape consistency
@@ -9806,11 +9788,30 @@ def describe_categorical_1d(data):
98069788

98079789
return pd.Series(result, index=names, name=data.name, dtype=dtype)
98089790

9791+
def describe_timestamp_1d(data):
9792+
#GH-30164
9793+
tz = data.dt.tz
9794+
asint = data.dropna().values.view("i8")
9795+
is_empty = asint.shape[0] == 0
9796+
stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
9797+
d = (
9798+
[
9799+
asint.shape[0],
9800+
Timestamp(asint.mean(), tz=tz) if not is_empty else NaT,
9801+
Timestamp(asint.min(), tz=tz) if not is_empty else NaT,
9802+
]
9803+
+ data.quantile(percentiles).tolist()
9804+
+ [Timestamp(asint.max(), tz=tz) if not is_empty else NaT]
9805+
)
9806+
return pd.Series(d, index=stat_index, name=data.name)
9807+
98099808
def describe_1d(data):
98109809
if is_bool_dtype(data):
98119810
return describe_categorical_1d(data)
98129811
elif is_numeric_dtype(data):
98139812
return describe_numeric_1d(data)
9813+
elif is_datetime64_any_dtype(data):
9814+
return describe_timestamp_1d(data)
98149815
elif is_timedelta64_dtype(data):
98159816
return describe_numeric_1d(data)
98169817
else:

pandas/tests/frame/test_analytics.py

+6-39
Original file line numberDiff line numberDiff line change
@@ -781,52 +781,19 @@ def test_describe_tz_values(self, tz_naive_fixture):
781781

782782
expected = DataFrame(
783783
{
784-
"s1": [
785-
5,
786-
np.nan,
787-
np.nan,
788-
np.nan,
789-
np.nan,
790-
np.nan,
791-
2,
792-
1.581139,
793-
0,
794-
1,
795-
2,
796-
3,
797-
4,
798-
],
784+
"s1": [5, 2, 0, 1, 2, 3, 4, 1.581139,],
799785
"s2": [
800786
5,
801-
5,
802-
s2.value_counts().index[0],
803-
1,
787+
Timestamp(2018, 1, 3).tz_localize(tz),
804788
start.tz_localize(tz),
789+
s2[1],
790+
s2[2],
791+
s2[3],
805792
end.tz_localize(tz),
806793
np.nan,
807-
np.nan,
808-
np.nan,
809-
np.nan,
810-
np.nan,
811-
np.nan,
812-
np.nan,
813794
],
814795
},
815-
index=[
816-
"count",
817-
"unique",
818-
"top",
819-
"freq",
820-
"first",
821-
"last",
822-
"mean",
823-
"std",
824-
"min",
825-
"25%",
826-
"50%",
827-
"75%",
828-
"max",
829-
],
796+
index=["count", "mean", "min", "25%", "50%", "75%", "max", "std",],
830797
)
831798
result = df.describe(include="all")
832799
tm.assert_frame_equal(result, expected)

pandas/tests/series/test_analytics.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,15 @@ def test_describe_with_tz(self, tz_naive_fixture):
7777
expected = Series(
7878
[
7979
5,
80-
5,
81-
s.value_counts().index[0],
82-
1,
80+
Timestamp(2018, 1, 3).tz_localize(tz),
8381
start.tz_localize(tz),
82+
s[1],
83+
s[2],
84+
s[3],
8485
end.tz_localize(tz),
8586
],
8687
name=name,
87-
index=["count", "unique", "top", "freq", "first", "last"],
88+
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
8889
)
8990
tm.assert_series_equal(result, expected)
9091

0 commit comments

Comments
 (0)