Skip to content

Commit e027df0

Browse files
CLN: consolidate Series.quantile and DataFrame.quantile
1 parent fe584e7 commit e027df0

File tree

4 files changed

+143
-196
lines changed

4 files changed

+143
-196
lines changed

pandas/core/frame.py

+10-97
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@
6262
import pandas.algos as _algos
6363

6464
from pandas.core.config import get_option
65-
from pandas import _np_version_under1p9
65+
66+
from textwrap import dedent
6667

6768
# ---------------------------------------------------------------------
6869
# Docstring templates
@@ -4919,108 +4920,20 @@ def f(s):
49194920

49204921
return data.apply(f, axis=axis)
49214922

4922-
def quantile(self, q=0.5, axis=0, numeric_only=True,
4923-
interpolation='linear'):
4924-
"""
4925-
Return values at the given quantile over requested axis, a la
4926-
numpy.percentile.
4927-
4928-
Parameters
4929-
----------
4930-
q : float or array-like, default 0.5 (50% quantile)
4931-
0 <= q <= 1, the quantile(s) to compute
4932-
axis : {0, 1, 'index', 'columns'} (default 0)
4933-
0 or 'index' for row-wise, 1 or 'columns' for column-wise
4934-
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
4935-
.. versionadded:: 0.18.0
4936-
This optional parameter specifies the interpolation method to use,
4937-
when the desired quantile lies between two data points `i` and `j`:
4938-
4939-
* linear: `i + (j - i) * fraction`, where `fraction` is the
4940-
fractional part of the index surrounded by `i` and `j`.
4941-
* lower: `i`.
4942-
* higher: `j`.
4943-
* nearest: `i` or `j` whichever is nearest.
4944-
* midpoint: (`i` + `j`) / 2.
4945-
4946-
Returns
4947-
-------
4923+
@Substitution(dedent("""
49484924
quantiles : Series or DataFrame
49494925
If ``q`` is an array, a DataFrame will be returned where the
49504926
index is ``q``, the columns are the columns of self, and the
49514927
values are the quantiles.
49524928
If ``q`` is a float, a Series will be returned where the
49534929
index is the columns of self and the values are the quantiles.
4954-
4955-
Examples
4956-
--------
4957-
4958-
>>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
4959-
columns=['a', 'b'])
4960-
>>> df.quantile(.1)
4961-
a 1.3
4962-
b 3.7
4963-
dtype: float64
4964-
>>> df.quantile([.1, .5])
4965-
a b
4966-
0.1 1.3 3.7
4967-
0.5 2.5 55.0
4968-
"""
4969-
self._check_percentile(q)
4970-
per = np.asarray(q) * 100
4971-
4972-
if not com.is_list_like(per):
4973-
per = [per]
4974-
q = [q]
4975-
squeeze = True
4976-
else:
4977-
squeeze = False
4978-
4979-
if _np_version_under1p9:
4980-
if interpolation != 'linear':
4981-
raise ValueError("Interpolation methods other than linear "
4982-
"are not supported in numpy < 1.9")
4983-
4984-
def f(arr, per, interpolation):
4985-
if arr._is_datelike_mixed_type:
4986-
values = _values_from_object(arr).view('i8')
4987-
else:
4988-
values = arr.astype(float)
4989-
values = values[notnull(values)]
4990-
if len(values) == 0:
4991-
return NA
4992-
else:
4993-
if _np_version_under1p9:
4994-
return _quantile(values, per)
4995-
else:
4996-
return _quantile(values, per, interpolation=interpolation)
4997-
4998-
data = self._get_numeric_data() if numeric_only else self
4999-
5000-
axis = self._get_axis_number(axis)
5001-
5002-
if axis == 1:
5003-
data = data.T
5004-
5005-
# need to know which cols are timestamp going in so that we can
5006-
# map timestamp over them after getting the quantile.
5007-
is_dt_col = data.dtypes.map(com.is_datetime64_dtype)
5008-
is_dt_col = is_dt_col[is_dt_col].index
5009-
5010-
quantiles = [[f(vals, x, interpolation) for x in per]
5011-
for (_, vals) in data.iteritems()]
5012-
5013-
result = self._constructor(quantiles, index=data._info_axis,
5014-
columns=q).T
5015-
if len(is_dt_col) > 0:
5016-
result[is_dt_col] = result[is_dt_col].applymap(lib.Timestamp)
5017-
if squeeze:
5018-
if result.shape == (1, 1):
5019-
result = result.T.iloc[:, 0] # don't want scalar
5020-
else:
5021-
result = result.T.squeeze()
5022-
result.name = None # For groupby, so it can set an index name
5023-
return result
4930+
"""))
4931+
@Appender(_shared_docs['quantile'])
4932+
def quantile(self, q=0.5, axis=0, numeric_only=True,
4933+
interpolation='linear'):
4934+
return super(DataFrame,
4935+
self).quantile(q=q, axis=axis, numeric_only=numeric_only,
4936+
interpolation=interpolation)
50244937

50254938
def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
50264939
"""

pandas/core/generic.py

+122-37
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727
SettingWithCopyError, SettingWithCopyWarning,
2828
AbstractMethodError)
2929
import pandas.core.nanops as nanops
30+
from numpy import percentile as _quantile
3031
from pandas.util.decorators import Appender, Substitution, deprecate_kwarg
3132
from pandas.core import config
33+
from pandas import _np_version_under1p9
3234

3335
# goal is to be able to define the docs close to function, while still being
3436
# able to share
@@ -842,43 +844,7 @@ def __contains__(self, key):
842844

843845
@property
844846
def empty(self):
845-
"""True if NDFrame is entirely empty [no items], meaning any of the
846-
axes are of length 0.
847-
848-
Notes
849-
-----
850-
If NDFrame contains only NaNs, it is still not considered empty. See
851-
the example below.
852-
853-
Examples
854-
--------
855-
An example of an actual empty DataFrame. Notice the index is empty:
856-
857-
>>> df_empty = pd.DataFrame({'A' : []})
858-
>>> df_empty
859-
Empty DataFrame
860-
Columns: [A]
861-
Index: []
862-
>>> df_empty.empty
863-
True
864-
865-
If we only have NaNs in our DataFrame, it is not considered empty! We
866-
will need to drop the NaNs to make the DataFrame empty:
867-
868-
>>> df = pd.DataFrame({'A' : [np.nan]})
869-
>>> df
870-
A
871-
0 NaN
872-
>>> df.empty
873-
False
874-
>>> df.dropna().empty
875-
True
876-
877-
See also
878-
--------
879-
pandas.Series.dropna
880-
pandas.DataFrame.dropna
881-
"""
847+
"""True if NDFrame is entirely empty [no items]"""
882848
return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS)
883849

884850
def __nonzero__(self):
@@ -4110,6 +4076,125 @@ def ranker(data):
41104076

41114077
return ranker(data)
41124078

4079+
_shared_docs['quantile'] = ("""
4080+
Return values at the given quantile over requested axis, a la
4081+
numpy.percentile.
4082+
4083+
Parameters
4084+
----------
4085+
q : float or array-like, default 0.5 (50 percentile)
4086+
0 <= q <= 1, the quantile(s) to compute
4087+
axis : {0, 1, 'index', 'columns'} (default 0)
4088+
0 or 'index' for row-wise, 1 or 'columns' for column-wise
4089+
numeric_only : boolean, default None
4090+
Include only float, int, boolean data. If None, will attempt to use
4091+
everything, then use only numeric data
4092+
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
4093+
.. versionadded:: 0.18.0
4094+
This optional parameter specifies the interpolation method to use,
4095+
when the desired quantile lies between two data points `i` and `j`:
4096+
4097+
* linear: `i + (j - i) * fraction`, where `fraction` is the
4098+
fractional part of the index surrounded by `i` and `j`.
4099+
* lower: `i`.
4100+
* higher: `j`.
4101+
* nearest: `i` or `j` whichever is nearest.
4102+
* midpoint: (`i` + `j`) / 2.
4103+
4104+
Returns
4105+
-------
4106+
%s
4107+
4108+
Examples
4109+
--------
4110+
4111+
>>> s = Series([1, 2, 3, 4])
4112+
>>> s.quantile(.5)
4113+
2.5
4114+
>>> s.quantile([.25, .5, .75])
4115+
0.25 1.75
4116+
0.50 2.50
4117+
0.75 3.25
4118+
dtype: float64
4119+
>>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
4120+
columns=['a', 'b'])
4121+
>>> df.quantile(.1)
4122+
a 1.3
4123+
b 3.7
4124+
dtype: float64
4125+
>>> df.quantile([.1, .5])
4126+
a b
4127+
0.1 1.3 3.7
4128+
0.5 2.5 55.0
4129+
""")
4130+
4131+
@Appender(_shared_docs['quantile'] % '')
4132+
def quantile(self, q=0.5, axis=0, numeric_only=None,
4133+
interpolation='linear'):
4134+
if self.ndim >= 3:
4135+
msg = "quantile is not implemented on on Panel or PanelND objects."
4136+
raise NotImplementedError(msg)
4137+
elif self.ndim == 1:
4138+
result = self.to_frame().quantile(q=q, axis=axis,
4139+
numeric_only=numeric_only,
4140+
interpolation=interpolation)
4141+
if not com.is_list_like(q):
4142+
return result.iloc[0]
4143+
else:
4144+
return result[result.columns[0]]
4145+
4146+
self._check_percentile(q)
4147+
per = np.asarray(q) * 100
4148+
4149+
if not com.is_list_like(per):
4150+
per = [per]
4151+
q = [q]
4152+
squeeze = True
4153+
else:
4154+
squeeze = False
4155+
4156+
if _np_version_under1p9:
4157+
if interpolation != 'linear':
4158+
raise ValueError("Interpolation methods other than linear "
4159+
"are not supported in numpy < 1.9")
4160+
4161+
def f(arr, per, interpolation):
4162+
boxer = com.i8_boxer(arr) \
4163+
if com.needs_i8_conversion(arr) else lambda x: x
4164+
if arr._is_datelike_mixed_type:
4165+
values = _values_from_object(arr).view('i8')
4166+
else:
4167+
values = arr.astype(float)
4168+
values = values[notnull(values)]
4169+
if len(values) == 0:
4170+
return boxer(np.nan)
4171+
else:
4172+
if _np_version_under1p9:
4173+
return boxer(_quantile(values, per))
4174+
else:
4175+
return boxer(_quantile(values, per,
4176+
interpolation=interpolation))
4177+
4178+
data = self._get_numeric_data() if numeric_only else self
4179+
4180+
axis = self._get_axis_number(axis)
4181+
4182+
if axis == 1:
4183+
data = data.T
4184+
4185+
quantiles = [[f(vals, x, interpolation) for x in per]
4186+
for (_, vals) in data.iteritems()]
4187+
4188+
result = self._constructor(quantiles, index=data._info_axis,
4189+
columns=q).T
4190+
if squeeze:
4191+
if result.shape == (1, 1):
4192+
result = result.T.iloc[:, 0] # don't want scalar
4193+
else:
4194+
result = result.T.squeeze()
4195+
result.name = None # For groupby, so it can set an index name
4196+
return result
4197+
41134198
_shared_docs['align'] = ("""
41144199
Align two object on their axes with the
41154200
specified join method for each axis Index

0 commit comments

Comments
 (0)