Skip to content

Commit aad72cb

Browse files
committed
PERF: quantile now operates per block boosting perf
REGR: series quantile with nan closes pandas-dev#11623 closes pandas-dev#13098
1 parent 4aa6323 commit aad72cb

File tree

12 files changed

+352
-103
lines changed

12 files changed

+352
-103
lines changed

asv_bench/benchmarks/frame_methods.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class frame_get_dtype_counts(object):
423423
goal_time = 0.2
424424

425425
def setup(self):
426-
self.df = pandas.DataFrame(np.random.randn(10, 10000))
426+
self.df = DataFrame(np.random.randn(10, 10000))
427427

428428
def time_frame_get_dtype_counts(self):
429429
self.df.get_dtype_counts()
@@ -985,3 +985,14 @@ def setup(self):
985985

986986
def time_series_string_vector_slice(self):
987987
self.s.str[:5]
988+
989+
990+
class frame_quantile_axis1(object):
991+
goal_time = 0.2
992+
993+
def setup(self):
994+
self.df = DataFrame(np.random.randn(1000, 3),
995+
columns=list('ABC'))
996+
997+
def time_frame_quantile_axis1(self):
998+
self.df.quantile([0.1, 0.5], axis=1)

codecov.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ coverage:
99
branches: null
1010
changes:
1111
default:
12-
branches: null
12+
branches:
13+
- master

doc/source/whatsnew/v0.18.1.txt

-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,6 @@ Performance Improvements
563563
- Improved speed of SAS reader (:issue:`12656`, :issue:`12961`)
564564
- Performance improvements in ``.groupby(..).cumcount()`` (:issue:`11039`)
565565
- Improved memory usage in ``pd.read_csv()`` when using ``skiprows=an_integer`` (:issue:`13005`)
566-
567566
- Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
568567
- Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`).
569568
- Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`)

doc/source/whatsnew/v0.18.2.txt

+4
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ Performance Improvements
9797

9898
- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`)
9999
- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`)
100+
- increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
101+
102+
100103

101104

102105

@@ -110,6 +113,7 @@ Bug Fixes
110113

111114

112115

116+
- Regression in ``Series.quantile`` with nans (:issue:`13098`)
113117

114118

115119

pandas/core/frame.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -4989,31 +4989,27 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
49894989
0.5 2.5 55.0
49904990
"""
49914991
self._check_percentile(q)
4992-
if not com.is_list_like(q):
4993-
q = [q]
4994-
squeeze = True
4995-
else:
4996-
squeeze = False
49974992

49984993
data = self._get_numeric_data() if numeric_only else self
49994994
axis = self._get_axis_number(axis)
4995+
is_transposed = axis == 1
50004996

5001-
def _quantile(series):
5002-
res = series.quantile(q, interpolation=interpolation)
5003-
return series.name, res
5004-
5005-
if axis == 1:
4997+
if is_transposed:
50064998
data = data.T
50074999

5008-
# unable to use DataFrame.apply, becasuse data may be empty
5009-
result = dict(_quantile(s) for (_, s) in data.iteritems())
5010-
result = self._constructor(result, columns=data.columns)
5011-
if squeeze:
5012-
if result.shape == (1, 1):
5013-
result = result.T.iloc[:, 0] # don't want scalar
5014-
else:
5015-
result = result.T.squeeze()
5016-
result.name = None # For groupby, so it can set an index name
5000+
result = data._data.quantile(qs=q,
5001+
axis=1,
5002+
interpolation=interpolation,
5003+
transposed=is_transposed)
5004+
5005+
if result.ndim == 2:
5006+
result = self._constructor(result)
5007+
else:
5008+
result = self._constructor_sliced(result, name=q)
5009+
5010+
if is_transposed:
5011+
result = result.T
5012+
50175013
return result
50185014

50195015
def to_timestamp(self, freq=None, how='start', axis=0, copy=True):

0 commit comments

Comments
 (0)