Skip to content

Commit e05f66a

Browse files
mayankasthanajreback
authored andcommitted
Closes issue #10174. Added 'interpolation' keyword in Dataframe.quantile and Series.quantile
1 parent 816a51f commit e05f66a

File tree

5 files changed

+196
-28
lines changed

5 files changed

+196
-28
lines changed

doc/source/whatsnew/v0.18.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Other enhancements
111111
- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
112112
values it contains (:issue:`11597`)
113113
- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`)
114+
- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`).
114115

115116
.. _whatsnew_0180.enhancements.rounding:
116117

pandas/core/frame.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,9 @@
6464
import pandas.algos as _algos
6565

6666
from pandas.core.config import get_option
67+
from pandas import _np_version_under1p9
6768

68-
#----------------------------------------------------------------------
69+
# ----------------------------------------------------------------------
6970
# Docstring templates
7071

7172
_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame',
@@ -1578,7 +1579,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
15781579
longtable = get_option("display.latex.longtable")
15791580
if escape is None:
15801581
escape = get_option("display.latex.escape")
1581-
1582+
15821583
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
15831584
col_space=col_space, na_rep=na_rep,
15841585
header=header, index=index,
@@ -4430,7 +4431,7 @@ def round(self, decimals=0, out=None):
44304431
Returns
44314432
-------
44324433
DataFrame object
4433-
4434+
44344435
See Also
44354436
--------
44364437
numpy.around
@@ -4874,7 +4875,8 @@ def mode(self, axis=0, numeric_only=False):
48744875
f = lambda s: s.mode()
48754876
return data.apply(f, axis=axis)
48764877

4877-
def quantile(self, q=0.5, axis=0, numeric_only=True):
4878+
def quantile(self, q=0.5, axis=0, numeric_only=True,
4879+
interpolation='linear'):
48784880
"""
48794881
Return values at the given quantile over requested axis, a la
48804882
numpy.percentile.
@@ -4885,7 +4887,16 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
48854887
0 <= q <= 1, the quantile(s) to compute
48864888
axis : {0, 1, 'index', 'columns'} (default 0)
48874889
0 or 'index' for row-wise, 1 or 'columns' for column-wise
4888-
4890+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
4891+
.. versionadded:: 0.18.0
4892+
This optional parameter specifies the interpolation method to use,
4893+
when the desired quantile lies between two data points `i` and `j`:
4894+
* linear: `i + (j - i) * fraction`, where `fraction` is the
4895+
fractional part of the index surrounded by `i` and `j`.
4896+
* lower: `i`.
4897+
* higher: `j`.
4898+
* nearest: `i` or `j` whichever is nearest.
4899+
* midpoint: (`i` + `j`) / 2.
48894900
48904901
Returns
48914902
-------
@@ -4920,7 +4931,12 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
49204931
else:
49214932
squeeze = False
49224933

4923-
def f(arr, per):
4934+
if _np_version_under1p9:
4935+
if interpolation != 'linear':
4936+
raise ValueError("Interpolation methods other than linear "
4937+
"are not supported in numpy < 1.9")
4938+
4939+
def f(arr, per, interpolation):
49244940
if arr._is_datelike_mixed_type:
49254941
values = _values_from_object(arr).view('i8')
49264942
else:
@@ -4929,7 +4945,10 @@ def f(arr, per):
49294945
if len(values) == 0:
49304946
return NA
49314947
else:
4932-
return _quantile(values, per)
4948+
if _np_version_under1p9:
4949+
return _quantile(values, per)
4950+
else:
4951+
return _quantile(values, per, interpolation=interpolation)
49334952

49344953
data = self._get_numeric_data() if numeric_only else self
49354954

@@ -4943,7 +4962,7 @@ def f(arr, per):
49434962
is_dt_col = data.dtypes.map(com.is_datetime64_dtype)
49444963
is_dt_col = is_dt_col[is_dt_col].index
49454964

4946-
quantiles = [[f(vals, x) for x in per]
4965+
quantiles = [[f(vals, x, interpolation) for x in per]
49474966
for (_, vals) in data.iteritems()]
49484967

49494968
result = self._constructor(quantiles, index=data._info_axis,

pandas/core/series.py

+33-11
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
from numpy import percentile as _quantile
5959
from pandas.core.config import get_option
6060

61+
from pandas import _np_version_under1p9
62+
6163
__all__ = ['Series']
6264

6365

@@ -1238,18 +1240,18 @@ def idxmax(self, axis=None, out=None, skipna=True):
12381240
def round(self, decimals=0):
12391241
"""
12401242
Round each value in a Series to the given number of decimals.
1241-
1243+
12421244
Parameters
12431245
----------
12441246
decimals : int
1245-
Number of decimal places to round to (default: 0).
1246-
If decimals is negative, it specifies the number of
1247+
Number of decimal places to round to (default: 0).
1248+
If decimals is negative, it specifies the number of
12471249
positions to the left of the decimal point.
1248-
1250+
12491251
Returns
12501252
-------
12511253
Series object
1252-
1254+
12531255
See Also
12541256
--------
12551257
numpy.around
@@ -1261,14 +1263,24 @@ def round(self, decimals=0):
12611263

12621264
return result
12631265

1264-
def quantile(self, q=0.5):
1266+
def quantile(self, q=0.5, interpolation='linear'):
12651267
"""
12661268
Return value at the given quantile, a la numpy.percentile.
12671269
12681270
Parameters
12691271
----------
12701272
q : float or array-like, default 0.5 (50% quantile)
12711273
0 <= q <= 1, the quantile(s) to compute
1274+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1275+
.. versionadded:: 0.18.0
1276+
This optional parameter specifies the interpolation method to use,
1277+
when the desired quantile lies between two data points `i` and `j`:
1278+
* linear: `i + (j - i) * fraction`, where `fraction` is the
1279+
fractional part of the index surrounded by `i` and `j`.
1280+
* lower: `i`.
1281+
* higher: `j`.
1282+
* nearest: `i` or `j` whichever is nearest.
1283+
* midpoint: (`i` + `j`) / 2.
12721284
12731285
Returns
12741286
-------
@@ -1288,19 +1300,29 @@ def quantile(self, q=0.5):
12881300
0.75 3.25
12891301
dtype: float64
12901302
"""
1291-
valid = self.dropna()
1303+
12921304
self._check_percentile(q)
12931305

1294-
def multi(values, qs):
1306+
if _np_version_under1p9:
1307+
if interpolation != 'linear':
1308+
raise ValueError("Interpolation methods other than linear "
1309+
"are not supported in numpy < 1.9.")
1310+
1311+
def multi(values, qs, **kwargs):
12951312
if com.is_list_like(qs):
1296-
values = [_quantile(values, x*100) for x in qs]
1313+
values = [_quantile(values, x * 100, **kwargs) for x in qs]
12971314
# let empty result to be Float64Index
12981315
qs = Float64Index(qs)
12991316
return self._constructor(values, index=qs, name=self.name)
13001317
else:
1301-
return _quantile(values, qs*100)
1318+
return _quantile(values, qs * 100, **kwargs)
1319+
1320+
kwargs = dict()
1321+
if not _np_version_under1p9:
1322+
kwargs.update({'interpolation': interpolation})
13021323

1303-
return self._maybe_box(lambda values: multi(values, q), dropna=True)
1324+
return self._maybe_box(lambda values: multi(values, q, **kwargs),
1325+
dropna=True)
13041326

13051327
def corr(self, other, method='pearson',
13061328
min_periods=None):

pandas/tests/test_frame.py

+90-6
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@
5555
import pandas.lib as lib
5656

5757
from numpy.testing.decorators import slow
58+
from pandas import _np_version_under1p9
5859

59-
#---------------------------------------------------------------------
60+
# ---------------------------------------------------------------------
6061
# DataFrame test cases
6162

6263
JOIN_TYPES = ['inner', 'outer', 'left', 'right']
@@ -5457,10 +5458,10 @@ def test_repr_column_name_unicode_truncation_bug(self):
54575458
def test_head_tail(self):
54585459
assert_frame_equal(self.frame.head(), self.frame[:5])
54595460
assert_frame_equal(self.frame.tail(), self.frame[-5:])
5460-
5461+
54615462
assert_frame_equal(self.frame.head(0), self.frame[0:0])
54625463
assert_frame_equal(self.frame.tail(0), self.frame[0:0])
5463-
5464+
54645465
assert_frame_equal(self.frame.head(-1), self.frame[:-1])
54655466
assert_frame_equal(self.frame.tail(-1), self.frame[1:])
54665467
assert_frame_equal(self.frame.head(1), self.frame[:1])
@@ -13564,10 +13565,11 @@ def test_round_issue(self):
1356413565

1356513566
decimals = pd.Series([1, 0, 2], index=['A', 'B', 'A'])
1356613567
self.assertRaises(ValueError, df.round, decimals)
13567-
13568+
1356813569
def test_built_in_round(self):
1356913570
if not compat.PY3:
13570-
raise nose.SkipTest('build in round cannot be overriden prior to Python 3')
13571+
raise nose.SkipTest("build in round cannot be overriden "
13572+
"prior to Python 3")
1357113573

1357213574
# GH11763
1357313575
# Here's the test frame we'll be working with
@@ -13578,7 +13580,7 @@ def test_built_in_round(self):
1357813580
expected_rounded = DataFrame(
1357913581
{'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
1358013582
tm.assert_frame_equal(round(df), expected_rounded)
13581-
13583+
1358213584
def test_quantile(self):
1358313585
from numpy import percentile
1358413586

@@ -13642,6 +13644,88 @@ def test_quantile_axis_parameter(self):
1364213644
self.assertRaises(ValueError, df.quantile, 0.1, axis=-1)
1364313645
self.assertRaises(ValueError, df.quantile, 0.1, axis="column")
1364413646

13647+
def test_quantile_interpolation(self):
13648+
# GH #10174
13649+
if _np_version_under1p9:
13650+
raise nose.SkipTest("Numpy version under 1.9")
13651+
13652+
from numpy import percentile
13653+
13654+
# interpolation = linear (default case)
13655+
q = self.tsframe.quantile(0.1, axis=0, interpolation='linear')
13656+
self.assertEqual(q['A'], percentile(self.tsframe['A'], 10))
13657+
q = self.intframe.quantile(0.1)
13658+
self.assertEqual(q['A'], percentile(self.intframe['A'], 10))
13659+
13660+
# test with and without interpolation keyword
13661+
q1 = self.intframe.quantile(0.1)
13662+
self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10))
13663+
assert_series_equal(q, q1)
13664+
13665+
# interpolation method other than default linear
13666+
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
13667+
result = df.quantile(.5, axis=1, interpolation='nearest')
13668+
expected = Series([1., 2., 3.], index=[1, 2, 3])
13669+
assert_series_equal(result, expected)
13670+
13671+
# axis
13672+
result = df.quantile([.5, .75], axis=1, interpolation='lower')
13673+
expected = DataFrame({1: [1., 1.], 2: [2., 2.],
13674+
3: [3., 3.]}, index=[0.5, 0.75])
13675+
assert_frame_equal(result, expected)
13676+
13677+
# test degenerate case
13678+
df = DataFrame({'x': [], 'y': []})
13679+
q = df.quantile(0.1, axis=0, interpolation='higher')
13680+
assert(np.isnan(q['x']) and np.isnan(q['y']))
13681+
13682+
# multi
13683+
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
13684+
columns=['a', 'b', 'c'])
13685+
result = df.quantile([.25, .5], interpolation='midpoint')
13686+
expected = DataFrame([[1.5, 1.5, 1.5], [2.5, 2.5, 2.5]],
13687+
index=[.25, .5], columns=['a', 'b', 'c'])
13688+
assert_frame_equal(result, expected)
13689+
13690+
def test_quantile_interpolation_np_lt_1p9(self):
13691+
# GH #10174
13692+
if not _np_version_under1p9:
13693+
raise nose.SkipTest("Numpy version is greater than 1.9")
13694+
13695+
from numpy import percentile
13696+
13697+
# interpolation = linear (default case)
13698+
q = self.tsframe.quantile(0.1, axis=0, interpolation='linear')
13699+
self.assertEqual(q['A'], percentile(self.tsframe['A'], 10))
13700+
q = self.intframe.quantile(0.1)
13701+
self.assertEqual(q['A'], percentile(self.intframe['A'], 10))
13702+
13703+
# test with and without interpolation keyword
13704+
q1 = self.intframe.quantile(0.1)
13705+
self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10))
13706+
assert_series_equal(q, q1)
13707+
13708+
# interpolation method other than default linear
13709+
expErrMsg = ("Interpolation methods other than linear"
13710+
" not supported in numpy < 1.9")
13711+
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
13712+
with assertRaisesRegexp(ValueError, expErrMsg):
13713+
df.quantile(.5, axis=1, interpolation='nearest')
13714+
13715+
with assertRaisesRegexp(ValueError, expErrMsg):
13716+
df.quantile([.5, .75], axis=1, interpolation='lower')
13717+
13718+
# test degenerate case
13719+
df = DataFrame({'x': [], 'y': []})
13720+
with assertRaisesRegexp(ValueError, expErrMsg):
13721+
q = df.quantile(0.1, axis=0, interpolation='higher')
13722+
13723+
# multi
13724+
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
13725+
columns=['a', 'b', 'c'])
13726+
with assertRaisesRegexp(ValueError, expErrMsg):
13727+
df.quantile([.25, .5], interpolation='midpoint')
13728+
1364513729
def test_quantile_multi(self):
1364613730
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
1364713731
columns=['a', 'b', 'c'])

pandas/tests/test_series.py

+45-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
import numpy.ma as ma
2020
import pandas as pd
2121

22-
from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range, NaT,
23-
date_range, period_range, timedelta_range, _np_version_under1p8)
22+
from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range,
23+
NaT, date_range, period_range, timedelta_range,
24+
_np_version_under1p8, _np_version_under1p9)
2425
from pandas.core.index import MultiIndex
2526
from pandas.core.indexing import IndexingError
2627
from pandas.tseries.period import PeriodIndex
@@ -3080,9 +3081,50 @@ def test_quantile_multi(self):
30803081
assert_series_equal(result, expected)
30813082

30823083
result = self.ts.quantile([])
3083-
expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float))
3084+
expected = pd.Series([], name=self.ts.name,
3085+
index=Index([], dtype=float))
30843086
assert_series_equal(result, expected)
30853087

3088+
def test_quantile_interpolation(self):
3089+
# GH #10174
3090+
if _np_version_under1p9:
3091+
raise nose.SkipTest("Numpy version is under 1.9")
3092+
3093+
from numpy import percentile
3094+
3095+
# interpolation = linear (default case)
3096+
q = self.ts.quantile(0.1, interpolation='linear')
3097+
self.assertEqual(q, percentile(self.ts.valid(), 10))
3098+
q1 = self.ts.quantile(0.1)
3099+
self.assertEqual(q1, percentile(self.ts.valid(), 10))
3100+
3101+
# test with and without interpolation keyword
3102+
self.assertEqual(q, q1)
3103+
3104+
def test_quantile_interpolation_np_lt_1p9(self):
3105+
# GH #10174
3106+
if not _np_version_under1p9:
3107+
raise nose.SkipTest("Numpy version is greater than 1.9")
3108+
3109+
from numpy import percentile
3110+
3111+
# interpolation = linear (default case)
3112+
q = self.ts.quantile(0.1, interpolation='linear')
3113+
self.assertEqual(q, percentile(self.ts.valid(), 10))
3114+
q1 = self.ts.quantile(0.1)
3115+
self.assertEqual(q1, percentile(self.ts.valid(), 10))
3116+
3117+
# interpolation other than linear
3118+
expErrMsg = "Interpolation methods other than " \
3119+
"linear not supported in numpy < 1.9"
3120+
with tm.assertRaisesRegexp(ValueError, expErrMsg):
3121+
self.ts.quantile(0.9, interpolation='nearest')
3122+
3123+
# object dtype
3124+
with tm.assertRaisesRegexp(ValueError, expErrMsg):
3125+
q = Series(self.ts, dtype=object).quantile(0.7,
3126+
interpolation='higher')
3127+
30863128
def test_append(self):
30873129
appendedSeries = self.series.append(self.objSeries)
30883130
for idx, value in compat.iteritems(appendedSeries):

0 commit comments

Comments
 (0)