Skip to content

Commit 0ff3a5d

Browse files
committed
Merge pull request pandas-dev#10881 from sinhrks/quantiles
ERR/DEPR: Fix quantile error message / remove percentile_width
2 parents 873d007 + 41929a9 commit 0ff3a5d

File tree

7 files changed

+70
-63
lines changed

7 files changed

+70
-63
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ Removal of prior version deprecations/changes
669669

670670
- Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`)
671671
- Removal of ``na_last`` parameters from ``Series.order()`` and ``Series.sort()``, in favor of ``na_position``, xref (:issue:`5231`)
672+
- Remove of ``percentile_width`` from ``.describe()``, in favor of ``percentiles``. (:issue:`7088`)
672673

673674
.. _whatsnew_0170.performance:
674675

@@ -694,6 +695,7 @@ Bug Fixes
694695
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
695696
- Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`)
696697
- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`)
698+
- Bug in ``Series.quantile`` dropping name (:issue:`10881`)
697699
- Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`)
698700
- Bug in ``pd.Series.interpolate`` with invalid ``order`` keyword values. (:issue:`10633`)
699701
- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`)

pandas/core/frame.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -4684,6 +4684,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
46844684
0.1 1.3 3.7
46854685
0.5 2.5 55.0
46864686
"""
4687+
self._check_percentile(q)
46874688
per = np.asarray(q) * 100
46884689

46894690
if not com.is_list_like(per):
@@ -4718,7 +4719,9 @@ def f(arr, per):
47184719

47194720
quantiles = [[f(vals, x) for x in per]
47204721
for (_, vals) in data.iteritems()]
4721-
result = DataFrame(quantiles, index=data._info_axis, columns=q).T
4722+
4723+
result = self._constructor(quantiles, index=data._info_axis,
4724+
columns=q).T
47224725
if len(is_dt_col) > 0:
47234726
result[is_dt_col] = result[is_dt_col].applymap(lib.Timestamp)
47244727
if squeeze:

pandas/core/generic.py

+18-28
Original file line numberDiff line numberDiff line change
@@ -2989,7 +2989,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
29892989
* 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
29902990
'barycentric', 'polynomial' is passed to
29912991
``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline'
2992-
require that you also specify an `order` (int),
2992+
require that you also specify an `order` (int),
29932993
e.g. df.interpolate(method='polynomial', order=4).
29942994
These use the actual numerical values of the index.
29952995
* 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all
@@ -4096,11 +4096,6 @@ def abs(self):
40964096
40974097
Parameters
40984098
----------
4099-
percentile_width : float, deprecated
4100-
The ``percentile_width`` argument will be removed in a future
4101-
version. Use ``percentiles`` instead.
4102-
width of the desired uncertainty interval, default is 50,
4103-
which corresponds to lower=25, upper=75
41044099
percentiles : array-like, optional
41054100
The percentiles to include in the output. Should all
41064101
be in the interval [0, 1]. By default `percentiles` is
@@ -4149,36 +4144,17 @@ def abs(self):
41494144
"""
41504145

41514146
@Appender(_shared_docs['describe'] % _shared_doc_kwargs)
4152-
def describe(self, percentile_width=None, percentiles=None, include=None, exclude=None ):
4147+
def describe(self, percentiles=None, include=None, exclude=None ):
41534148
if self.ndim >= 3:
41544149
msg = "describe is not implemented on on Panel or PanelND objects."
41554150
raise NotImplementedError(msg)
41564151

4157-
if percentile_width is not None and percentiles is not None:
4158-
msg = "Cannot specify both 'percentile_width' and 'percentiles.'"
4159-
raise ValueError(msg)
41604152
if percentiles is not None:
41614153
# get them all to be in [0, 1]
4154+
self._check_percentile(percentiles)
41624155
percentiles = np.asarray(percentiles)
4163-
if (percentiles > 1).any():
4164-
percentiles = percentiles / 100.0
4165-
msg = ("percentiles should all be in the interval [0, 1]. "
4166-
"Try {0} instead.")
4167-
raise ValueError(msg.format(list(percentiles)))
41684156
else:
4169-
# only warn if they change the default
4170-
if percentile_width is not None:
4171-
do_warn = True
4172-
else:
4173-
do_warn = False
4174-
percentile_width = percentile_width or 50
4175-
lb = .5 * (1. - percentile_width / 100.)
4176-
ub = 1. - lb
4177-
percentiles = np.array([lb, 0.5, ub])
4178-
if do_warn:
4179-
msg = ("The `percentile_width` keyword is deprecated. "
4180-
"Use percentiles={0} instead".format(list(percentiles)))
4181-
warnings.warn(msg, FutureWarning)
4157+
percentiles = np.array([0.25, 0.5, 0.75])
41824158

41834159
# median should always be included
41844160
if (percentiles != 0.5).all(): # median isn't included
@@ -4256,6 +4232,20 @@ def describe_1d(data, percentiles):
42564232
d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)
42574233
return d
42584234

4235+
def _check_percentile(self, q):
4236+
""" Validate percentiles. Used by describe and quantile """
4237+
4238+
msg = ("percentiles should all be in the interval [0, 1]. "
4239+
"Try {0} instead.")
4240+
q = np.asarray(q)
4241+
if q.ndim == 0:
4242+
if not 0 <= q <= 1:
4243+
raise ValueError(msg.format(q / 100.0))
4244+
else:
4245+
if not all(0 <= qs <= 1 for qs in q):
4246+
raise ValueError(msg.format(q / 100.0))
4247+
return q
4248+
42594249
_shared_docs['pct_change'] = """
42604250
Percent change over given number of periods.
42614251

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1266,11 +1266,12 @@ def quantile(self, q=0.5):
12661266
dtype: float64
12671267
"""
12681268
valid = self.dropna()
1269+
self._check_percentile(q)
12691270

12701271
def multi(values, qs):
12711272
if com.is_list_like(qs):
1272-
return Series([_quantile(values, x*100)
1273-
for x in qs], index=qs)
1273+
values = [_quantile(values, x*100) for x in qs]
1274+
return self._constructor(values, index=qs, name=self.name)
12741275
else:
12751276
return _quantile(values, qs*100)
12761277

pandas/tests/test_frame.py

+6
Original file line numberDiff line numberDiff line change
@@ -12837,6 +12837,12 @@ def test_quantile_datetime(self):
1283712837
index=[0.5], columns=[0, 1])
1283812838
assert_frame_equal(result, expected)
1283912839

12840+
def test_quantile_invalid(self):
12841+
msg = 'percentiles should all be in the interval \\[0, 1\\]'
12842+
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
12843+
with tm.assertRaisesRegexp(ValueError, msg):
12844+
self.tsframe.quantile(invalid)
12845+
1284012846
def test_cumsum(self):
1284112847
self.tsframe.ix[5:10, 0] = nan
1284212848
self.tsframe.ix[10:15, 1] = nan

pandas/tests/test_generic.py

+22-28
Original file line numberDiff line numberDiff line change
@@ -909,17 +909,6 @@ def test_describe(self):
909909
_ = self.series.describe()
910910
_ = self.ts.describe()
911911

912-
def test_describe_percentiles(self):
913-
with tm.assert_produces_warning(FutureWarning):
914-
desc = self.series.describe(percentile_width=50)
915-
assert '75%' in desc.index
916-
assert '25%' in desc.index
917-
918-
with tm.assert_produces_warning(FutureWarning):
919-
desc = self.series.describe(percentile_width=95)
920-
assert '97.5%' in desc.index
921-
assert '2.5%' in desc.index
922-
923912
def test_describe_objects(self):
924913
s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a'])
925914
result = s.describe()
@@ -1181,27 +1170,19 @@ def test_describe(self):
11811170
desc = tm.makeMixedDataFrame().describe()
11821171
desc = tm.makeTimeDataFrame().describe()
11831172

1184-
def test_describe_percentiles(self):
1185-
with tm.assert_produces_warning(FutureWarning):
1186-
desc = tm.makeDataFrame().describe(percentile_width=50)
1187-
assert '75%' in desc.index
1188-
assert '25%' in desc.index
1189-
1190-
with tm.assert_produces_warning(FutureWarning):
1191-
desc = tm.makeDataFrame().describe(percentile_width=95)
1192-
assert '97.5%' in desc.index
1193-
assert '2.5%' in desc.index
1194-
1195-
def test_describe_quantiles_both(self):
1196-
with tm.assertRaises(ValueError):
1197-
tm.makeDataFrame().describe(percentile_width=50,
1198-
percentiles=[25, 75])
1199-
12001173
def test_describe_percentiles_percent_or_raw(self):
1174+
msg = 'percentiles should all be in the interval \\[0, 1\\]'
1175+
12011176
df = tm.makeDataFrame()
1202-
with tm.assertRaises(ValueError):
1177+
with tm.assertRaisesRegexp(ValueError, msg):
12031178
df.describe(percentiles=[10, 50, 100])
12041179

1180+
with tm.assertRaisesRegexp(ValueError, msg):
1181+
df.describe(percentiles=[2])
1182+
1183+
with tm.assertRaisesRegexp(ValueError, msg):
1184+
df.describe(percentiles=[-2])
1185+
12051186
def test_describe_percentiles_equivalence(self):
12061187
df = tm.makeDataFrame()
12071188
d1 = df.describe()
@@ -1213,16 +1194,29 @@ def test_describe_percentiles_insert_median(self):
12131194
d1 = df.describe(percentiles=[.25, .75])
12141195
d2 = df.describe(percentiles=[.25, .5, .75])
12151196
assert_frame_equal(d1, d2)
1197+
self.assertTrue('25%' in d1.index)
1198+
self.assertTrue('75%' in d2.index)
12161199

12171200
# none above
12181201
d1 = df.describe(percentiles=[.25, .45])
12191202
d2 = df.describe(percentiles=[.25, .45, .5])
12201203
assert_frame_equal(d1, d2)
1204+
self.assertTrue('25%' in d1.index)
1205+
self.assertTrue('45%' in d2.index)
12211206

12221207
# none below
12231208
d1 = df.describe(percentiles=[.75, 1])
12241209
d2 = df.describe(percentiles=[.5, .75, 1])
12251210
assert_frame_equal(d1, d2)
1211+
self.assertTrue('75%' in d1.index)
1212+
self.assertTrue('100%' in d2.index)
1213+
1214+
# edge
1215+
d1 = df.describe(percentiles=[0, 1])
1216+
d2 = df.describe(percentiles=[0, .5, 1])
1217+
assert_frame_equal(d1, d2)
1218+
self.assertTrue('0%' in d1.index)
1219+
self.assertTrue('100%' in d2.index)
12261220

12271221
def test_describe_no_numeric(self):
12281222
df = DataFrame({'A': ['foo', 'foo', 'bar'] * 8,

pandas/tests/test_series.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -2854,21 +2854,32 @@ def test_quantile(self):
28542854
result = Series([np.timedelta64('NaT')]).sum()
28552855
self.assertTrue(result is pd.NaT)
28562856

2857+
msg = 'percentiles should all be in the interval \\[0, 1\\]'
2858+
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
2859+
with tm.assertRaisesRegexp(ValueError, msg):
2860+
self.ts.quantile(invalid)
2861+
28572862
def test_quantile_multi(self):
28582863
from numpy import percentile
28592864

28602865
qs = [.1, .9]
28612866
result = self.ts.quantile(qs)
28622867
expected = pd.Series([percentile(self.ts.valid(), 10),
28632868
percentile(self.ts.valid(), 90)],
2864-
index=qs)
2869+
index=qs, name=self.ts.name)
28652870
assert_series_equal(result, expected)
28662871

28672872
dts = self.ts.index.to_series()
2873+
dts.name = 'xxx'
28682874
result = dts.quantile((.2, .2))
2869-
assert_series_equal(result, Series([Timestamp('2000-01-10 19:12:00'),
2870-
Timestamp('2000-01-10 19:12:00')],
2871-
index=[.2, .2]))
2875+
expected = Series([Timestamp('2000-01-10 19:12:00'),
2876+
Timestamp('2000-01-10 19:12:00')],
2877+
index=[.2, .2], name='xxx')
2878+
assert_series_equal(result, expected)
2879+
2880+
result = self.ts.quantile([])
2881+
expected = pd.Series([], name=self.ts.name)
2882+
assert_series_equal(result, expected)
28722883

28732884
def test_append(self):
28742885
appendedSeries = self.series.append(self.objSeries)

0 commit comments

Comments
 (0)