Skip to content

Commit 7dc6f70

Browse files
DataOmbudsmanjorisvandenbossche
authored andcommitted
PERF: improve performance of NDFrame.describe (#21274)
1 parent 67e6e6f commit 7dc6f70

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

asv_bench/benchmarks/frame_methods.py

+18
Original file line numberDiff line numberDiff line change
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
512512

513513
def time_nsmallest(self, keep):
514514
self.df.nsmallest(100, 'A', keep=keep)
515+
516+
517+
class Describe(object):
518+
519+
goal_time = 0.2
520+
521+
def setup(self):
522+
self.df = DataFrame({
523+
'a': np.random.randint(0, 100, int(1e6)),
524+
'b': np.random.randint(0, 100, int(1e6)),
525+
'c': np.random.randint(0, 100, int(1e6))
526+
})
527+
528+
def time_series_describe(self):
529+
self.df['a'].describe()
530+
531+
def time_dataframe_describe(self):
532+
self.df.describe()

doc/source/whatsnew/v0.24.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ Removal of prior version deprecations/changes
6363
Performance Improvements
6464
~~~~~~~~~~~~~~~~~~~~~~~~
6565

66-
-
67-
-
66+
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
6867
-
6968

7069
.. _whatsnew_0240.docs:

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8519,7 +8519,7 @@ def describe_numeric_1d(series):
85198519
stat_index = (['count', 'mean', 'std', 'min'] +
85208520
formatted_percentiles + ['max'])
85218521
d = ([series.count(), series.mean(), series.std(), series.min()] +
8522-
[series.quantile(x) for x in percentiles] + [series.max()])
8522+
series.quantile(percentiles).tolist() + [series.max()])
85238523
return pd.Series(d, index=stat_index, name=series.name)
85248524

85258525
def describe_categorical_1d(data):

0 commit comments

Comments
 (0)