Skip to content

Commit 55836e6

Browse files
mayankasthanamkasthana-cs
authored andcommitted
Closes issue pandas-dev#10174. Added 'interpolation' keyword in Dataframe.quantile and Series.quantile
1 parent 816a51f commit 55836e6

File tree

5 files changed

+179
-12
lines changed

5 files changed

+179
-12
lines changed

doc/source/whatsnew/v0.18.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Other enhancements
111111
- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
112112
values it contains (:issue:`11597`)
113113
- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`)
114+
- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`).
114115

115116
.. _whatsnew_0180.enhancements.rounding:
116117

pandas/core/frame.py

+23-5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import pandas.algos as _algos
6565

6666
from pandas.core.config import get_option
67+
from pandas import _np_version_under1p9
6768

6869
#----------------------------------------------------------------------
6970
# Docstring templates
@@ -4874,7 +4875,7 @@ def mode(self, axis=0, numeric_only=False):
48744875
f = lambda s: s.mode()
48754876
return data.apply(f, axis=axis)
48764877

4877-
def quantile(self, q=0.5, axis=0, numeric_only=True):
4878+
def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear'):
48784879
"""
48794880
Return values at the given quantile over requested axis, a la
48804881
numpy.percentile.
@@ -4885,7 +4886,16 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
48854886
0 <= q <= 1, the quantile(s) to compute
48864887
axis : {0, 1, 'index', 'columns'} (default 0)
48874888
0 or 'index' for row-wise, 1 or 'columns' for column-wise
4888-
4889+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
4890+
.. versionadded:: 0.18.0
4891+
This optional parameter specifies the interpolation method to use,
4892+
when the desired quantile lies between two data points `i` and `j`:
4893+
* linear: `i + (j - i) * fraction`, where `fraction` is the
4894+
fractional part of the index surrounded by `i` and `j`.
4895+
* lower: `i`.
4896+
* higher: `j`.
4897+
* nearest: `i` or `j` whichever is nearest.
4898+
* midpoint: (`i` + `j`) / 2.
48894899
48904900
Returns
48914901
-------
@@ -4920,7 +4930,12 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
49204930
else:
49214931
squeeze = False
49224932

4923-
def f(arr, per):
4933+
if _np_version_under1p9:
4934+
if interpolation != 'linear':
4935+
raise ValueError("Interpolation methods"
4936+
" other than linear not supported in numpy < 1.9")
4937+
4938+
def f(arr, per,interpolation):
49244939
if arr._is_datelike_mixed_type:
49254940
values = _values_from_object(arr).view('i8')
49264941
else:
@@ -4929,7 +4944,10 @@ def f(arr, per):
49294944
if len(values) == 0:
49304945
return NA
49314946
else:
4932-
return _quantile(values, per)
4947+
if _np_version_under1p9:
4948+
return _quantile(values, per)
4949+
else:
4950+
return _quantile(values, per, interpolation=interpolation)
49334951

49344952
data = self._get_numeric_data() if numeric_only else self
49354953

@@ -4943,7 +4961,7 @@ def f(arr, per):
49434961
is_dt_col = data.dtypes.map(com.is_datetime64_dtype)
49444962
is_dt_col = is_dt_col[is_dt_col].index
49454963

4946-
quantiles = [[f(vals, x) for x in per]
4964+
quantiles = [[f(vals, x, interpolation) for x in per]
49474965
for (_, vals) in data.iteritems()]
49484966

49494967
result = self._constructor(quantiles, index=data._info_axis,

pandas/core/series.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
from numpy import percentile as _quantile
5959
from pandas.core.config import get_option
6060

61+
from pandas import _np_version_under1p9
62+
6163
__all__ = ['Series']
6264

6365

@@ -1261,14 +1263,24 @@ def round(self, decimals=0):
12611263

12621264
return result
12631265

1264-
def quantile(self, q=0.5):
1266+
def quantile(self, q=0.5, interpolation='linear'):
12651267
"""
12661268
Return value at the given quantile, a la numpy.percentile.
12671269
12681270
Parameters
12691271
----------
12701272
q : float or array-like, default 0.5 (50% quantile)
12711273
0 <= q <= 1, the quantile(s) to compute
1274+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1275+
.. versionadded:: 0.18.0
1276+
This optional parameter specifies the interpolation method to use,
1277+
when the desired quantile lies between two data points `i` and `j`:
1278+
* linear: `i + (j - i) * fraction`, where `fraction` is the
1279+
fractional part of the index surrounded by `i` and `j`.
1280+
* lower: `i`.
1281+
* higher: `j`.
1282+
* nearest: `i` or `j` whichever is nearest.
1283+
* midpoint: (`i` + `j`) / 2.
12721284
12731285
Returns
12741286
-------
@@ -1291,17 +1303,26 @@ def quantile(self, q=0.5):
12911303
valid = self.dropna()
12921304
self._check_percentile(q)
12931305

1294-
def multi(values, qs):
1306+
if _np_version_under1p9:
1307+
if interpolation != 'linear':
1308+
raise ValueError("Interpolation methods"
1309+
" other than linear not supported in numpy < 1.9.")
1310+
1311+
def multi(values,qs,**kwargs):
12951312
if com.is_list_like(qs):
1296-
values = [_quantile(values, x*100) for x in qs]
1313+
values = [_quantile(values, x*100, **kwargs) for x in qs]
12971314
# let empty result to be Float64Index
12981315
qs = Float64Index(qs)
12991316
return self._constructor(values, index=qs, name=self.name)
13001317
else:
1301-
return _quantile(values, qs*100)
1302-
1303-
return self._maybe_box(lambda values: multi(values, q), dropna=True)
1318+
return _quantile(values, qs*100, **kwargs)
1319+
1320+
kwargs = dict()
1321+
if not _np_version_under1p9:
1322+
kwargs.update({'interpolation':interpolation})
13041323

1324+
return self._maybe_box(lambda values: multi(values,q,**kwargs), dropna=True)
1325+
13051326
def corr(self, other, method='pearson',
13061327
min_periods=None):
13071328
"""

pandas/tests/test_frame.py

+88
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import pandas.lib as lib
5656

5757
from numpy.testing.decorators import slow
58+
from pandas import _np_version_under1p9
5859

5960
#---------------------------------------------------------------------
6061
# DataFrame test cases
@@ -13642,6 +13643,93 @@ def test_quantile_axis_parameter(self):
1364213643
self.assertRaises(ValueError, df.quantile, 0.1, axis=-1)
1364313644
self.assertRaises(ValueError, df.quantile, 0.1, axis="column")
1364413645

13646+
def test_quantile_interpolation(self):
13647+
# GH #10174
13648+
if _np_version_under1p9:
13649+
raise nose.SkipTest("Numpy version under 1.9")
13650+
13651+
from numpy import percentile
13652+
13653+
#interpolation = linear (default case)
13654+
q = self.tsframe.quantile(0.1, axis=0,interpolation='linear')
13655+
self.assertEqual(q['A'], percentile(self.tsframe['A'], 10))
13656+
q = self.intframe.quantile(0.1)
13657+
self.assertEqual(q['A'], percentile(self.intframe['A'], 10))
13658+
13659+
q1 = self.intframe.quantile(0.1)
13660+
self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10))
13661+
#test with and without interpolation keyword
13662+
assert_series_equal(q,q1)
13663+
13664+
#interpolation method other than default linear
13665+
13666+
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
13667+
result = df.quantile(.5, axis=1,interpolation='nearest')
13668+
expected = Series([1., 2., 3.], index=[1, 2, 3])
13669+
assert_series_equal(result, expected)
13670+
13671+
#axis
13672+
result = df.quantile([.5, .75], axis=1,interpolation='lower')
13673+
expected = DataFrame({1: [1., 1.], 2: [2., 2.],
13674+
3: [3., 3.]}, index=[0.5, 0.75])
13675+
assert_frame_equal(result, expected)
13676+
13677+
#test degenerate case
13678+
df = DataFrame({'x': [], 'y': []})
13679+
q = df.quantile(0.1, axis=0,interpolation='higher')
13680+
assert(np.isnan(q['x']) and np.isnan(q['y']))
13681+
13682+
#multi
13683+
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
13684+
columns=['a', 'b', 'c'])
13685+
result = df.quantile([.25, .5],interpolation='midpoint')
13686+
expected = DataFrame([[1.5, 1.5, 1.5], [2.5, 2.5, 2.5]],
13687+
index=[.25, .5], columns=['a', 'b', 'c'])
13688+
assert_frame_equal(result, expected)
13689+
13690+
13691+
def test_quantile_interpolation_np_lt_1p9(self):
13692+
# GH #10174
13693+
if not _np_version_under1p9:
13694+
raise nose.SkipTest("Numpy version is greater than 1.9")
13695+
13696+
from numpy import percentile
13697+
13698+
#interpolation = linear (default case)
13699+
q = self.tsframe.quantile(0.1, axis=0,interpolation='linear')
13700+
self.assertEqual(q['A'], percentile(self.tsframe['A'], 10))
13701+
q = self.intframe.quantile(0.1)
13702+
self.assertEqual(q['A'], percentile(self.intframe['A'], 10))
13703+
13704+
q1 = self.intframe.quantile(0.1)
13705+
self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10))
13706+
#test with and without interpolation keyword
13707+
assert_series_equal(q,q1)
13708+
13709+
#interpolation method other than default linear
13710+
13711+
expErrMsg = ("Interpolation methods other than linear"
13712+
" not supported in numpy < 1.9")
13713+
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
13714+
with assertRaisesRegexp(ValueError,expErrMsg):
13715+
df.quantile(.5, axis=1,interpolation='nearest')
13716+
13717+
with assertRaisesRegexp(ValueError,expErrMsg):
13718+
df.quantile([.5, .75], axis=1,interpolation='lower')
13719+
13720+
# test degenerate case
13721+
df = DataFrame({'x': [], 'y': []})
13722+
with assertRaisesRegexp(ValueError,expErrMsg):
13723+
q = df.quantile(0.1, axis=0,interpolation='higher')
13724+
13725+
#multi
13726+
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
13727+
columns=['a', 'b', 'c'])
13728+
with assertRaisesRegexp(ValueError,expErrMsg):
13729+
result = df.quantile([.25, .5],interpolation='midpoint')
13730+
13731+
13732+
1364513733
def test_quantile_multi(self):
1364613734
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
1364713735
columns=['a', 'b', 'c'])

pandas/tests/test_series.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
import pandas as pd
2121

2222
from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range, NaT,
23-
date_range, period_range, timedelta_range, _np_version_under1p8)
23+
date_range, period_range, timedelta_range, _np_version_under1p8,
24+
_np_version_under1p9)
2425
from pandas.core.index import MultiIndex
2526
from pandas.core.indexing import IndexingError
2627
from pandas.tseries.period import PeriodIndex
@@ -3083,6 +3084,44 @@ def test_quantile_multi(self):
30833084
expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float))
30843085
assert_series_equal(result, expected)
30853086

3087+
def test_quantile_interpolation(self):
3088+
# GH #10174
3089+
if _np_version_under1p9:
3090+
raise nose.SkipTest("Numpy version is under 1.9")
3091+
3092+
from numpy import percentile
3093+
3094+
#interpolation = linear (default case)
3095+
q = self.ts.quantile(0.1,interpolation='linear')
3096+
self.assertEqual(q, percentile(self.ts.valid(), 10))
3097+
q1 = self.ts.quantile(0.1)
3098+
self.assertEqual(q1, percentile(self.ts.valid(), 10))
3099+
3100+
#test with and without interpolation keyword
3101+
self.assertEqual(q,q1)
3102+
3103+
def test_quantile_interpolation_np_lt_1p9(self):
3104+
# GH #10174
3105+
if not _np_version_under1p9:
3106+
raise nose.SkipTest("Numpy version is greater than 1.9")
3107+
3108+
from numpy import percentile
3109+
3110+
#interpolation = linear (default case)
3111+
q = self.ts.quantile(0.1,interpolation='linear')
3112+
self.assertEqual(q, percentile(self.ts.valid(), 10))
3113+
q1 = self.ts.quantile(0.1)
3114+
self.assertEqual(q1, percentile(self.ts.valid(), 10))
3115+
3116+
#interpolation other than linear
3117+
expErrMsg = "Interpolation methods other than linear not supported in numpy < 1.9"
3118+
with tm.assertRaisesRegexp(ValueError,expErrMsg):
3119+
self.ts.quantile(0.9,interpolation='nearest')
3120+
3121+
# object dtype
3122+
with tm.assertRaisesRegexp(ValueError,expErrMsg):
3123+
q = Series(self.ts,dtype=object).quantile(0.7,interpolation='higher')
3124+
30863125
def test_append(self):
30873126
appendedSeries = self.series.append(self.objSeries)
30883127
for idx, value in compat.iteritems(appendedSeries):

0 commit comments

Comments
 (0)