|
27 | 27 | SettingWithCopyError, SettingWithCopyWarning,
|
28 | 28 | AbstractMethodError)
|
29 | 29 | import pandas.core.nanops as nanops
|
| 30 | +from numpy import percentile as _quantile |
30 | 31 | from pandas.util.decorators import Appender, Substitution, deprecate_kwarg
|
31 | 32 | from pandas.core import config
|
| 33 | +from pandas import _np_version_under1p9 |
32 | 34 |
|
33 | 35 | # goal is to be able to define the docs close to function, while still being
|
34 | 36 | # able to share
|
@@ -842,43 +844,7 @@ def __contains__(self, key):
|
842 | 844 |
|
843 | 845 | @property
|
844 | 846 | def empty(self):
|
845 |
| - """True if NDFrame is entirely empty [no items], meaning any of the |
846 |
| - axes are of length 0. |
847 |
| -
|
848 |
| - Notes |
849 |
| - ----- |
850 |
| - If NDFrame contains only NaNs, it is still not considered empty. See |
851 |
| - the example below. |
852 |
| -
|
853 |
| - Examples |
854 |
| - -------- |
855 |
| - An example of an actual empty DataFrame. Notice the index is empty: |
856 |
| -
|
857 |
| - >>> df_empty = pd.DataFrame({'A' : []}) |
858 |
| - >>> df_empty |
859 |
| - Empty DataFrame |
860 |
| - Columns: [A] |
861 |
| - Index: [] |
862 |
| - >>> df_empty.empty |
863 |
| - True |
864 |
| -
|
865 |
| - If we only have NaNs in our DataFrame, it is not considered empty! We |
866 |
| - will need to drop the NaNs to make the DataFrame empty: |
867 |
| -
|
868 |
| - >>> df = pd.DataFrame({'A' : [np.nan]}) |
869 |
| - >>> df |
870 |
| - A |
871 |
| - 0 NaN |
872 |
| - >>> df.empty |
873 |
| - False |
874 |
| - >>> df.dropna().empty |
875 |
| - True |
876 |
| -
|
877 |
| - See also |
878 |
| - -------- |
879 |
| - pandas.Series.dropna |
880 |
| - pandas.DataFrame.dropna |
881 |
| - """ |
| 847 | + """True if NDFrame is entirely empty [no items]""" |
882 | 848 | return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS)
|
883 | 849 |
|
884 | 850 | def __nonzero__(self):
|
@@ -4110,6 +4076,125 @@ def ranker(data):
|
4110 | 4076 |
|
4111 | 4077 | return ranker(data)
|
4112 | 4078 |
|
| 4079 | + _shared_docs['quantile'] = (""" |
| 4080 | + Return values at the given quantile over requested axis, a la |
| 4081 | + numpy.percentile. |
| 4082 | +
|
| 4083 | + Parameters |
| 4084 | + ---------- |
| 4085 | + q : float or array-like, default 0.5 (50 percentile) |
| 4086 | + 0 <= q <= 1, the quantile(s) to compute |
| 4087 | + axis : {0, 1, 'index', 'columns'} (default 0) |
| 4088 | + 0 or 'index' for row-wise, 1 or 'columns' for column-wise |
| 4089 | + numeric_only : boolean, default None |
| 4090 | + Include only float, int, boolean data. If None, will attempt to use |
| 4091 | + everything, then use only numeric data |
| 4092 | + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} |
| 4093 | + .. versionadded:: 0.18.0 |
| 4094 | + This optional parameter specifies the interpolation method to use, |
| 4095 | + when the desired quantile lies between two data points `i` and `j`: |
| 4096 | +
|
| 4097 | + * linear: `i + (j - i) * fraction`, where `fraction` is the |
| 4098 | + fractional part of the index surrounded by `i` and `j`. |
| 4099 | + * lower: `i`. |
| 4100 | + * higher: `j`. |
| 4101 | + * nearest: `i` or `j` whichever is nearest. |
| 4102 | + * midpoint: (`i` + `j`) / 2. |
| 4103 | +
|
| 4104 | + Returns |
| 4105 | + ------- |
| 4106 | + %s |
| 4107 | +
|
| 4108 | + Examples |
| 4109 | + -------- |
| 4110 | +
|
| 4111 | + >>> s = Series([1, 2, 3, 4]) |
| 4112 | + >>> s.quantile(.5) |
| 4113 | + 2.5 |
| 4114 | + >>> s.quantile([.25, .5, .75]) |
| 4115 | + 0.25 1.75 |
| 4116 | + 0.50 2.50 |
| 4117 | + 0.75 3.25 |
| 4118 | + dtype: float64 |
| 4119 | + >>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), |
| 4120 | + columns=['a', 'b']) |
| 4121 | + >>> df.quantile(.1) |
| 4122 | + a 1.3 |
| 4123 | + b 3.7 |
| 4124 | + dtype: float64 |
| 4125 | + >>> df.quantile([.1, .5]) |
| 4126 | + a b |
| 4127 | + 0.1 1.3 3.7 |
| 4128 | + 0.5 2.5 55.0 |
| 4129 | + """) |
| 4130 | + |
| 4131 | + @Appender(_shared_docs['quantile'] % '') |
| 4132 | + def quantile(self, q=0.5, axis=0, numeric_only=None, |
| 4133 | + interpolation='linear'): |
| 4134 | + if self.ndim >= 3: |
| 4135 | + msg = "quantile is not implemented on on Panel or PanelND objects." |
| 4136 | + raise NotImplementedError(msg) |
| 4137 | + elif self.ndim == 1: |
| 4138 | + result = self.to_frame().quantile(q=q, axis=axis, |
| 4139 | + numeric_only=numeric_only, |
| 4140 | + interpolation=interpolation) |
| 4141 | + if not com.is_list_like(q): |
| 4142 | + return result.iloc[0] |
| 4143 | + else: |
| 4144 | + return result[result.columns[0]] |
| 4145 | + |
| 4146 | + self._check_percentile(q) |
| 4147 | + per = np.asarray(q) * 100 |
| 4148 | + |
| 4149 | + if not com.is_list_like(per): |
| 4150 | + per = [per] |
| 4151 | + q = [q] |
| 4152 | + squeeze = True |
| 4153 | + else: |
| 4154 | + squeeze = False |
| 4155 | + |
| 4156 | + if _np_version_under1p9: |
| 4157 | + if interpolation != 'linear': |
| 4158 | + raise ValueError("Interpolation methods other than linear " |
| 4159 | + "are not supported in numpy < 1.9") |
| 4160 | + |
| 4161 | + def f(arr, per, interpolation): |
| 4162 | + boxer = com.i8_boxer(arr) \ |
| 4163 | + if com.needs_i8_conversion(arr) else lambda x: x |
| 4164 | + if arr._is_datelike_mixed_type: |
| 4165 | + values = _values_from_object(arr).view('i8') |
| 4166 | + else: |
| 4167 | + values = arr.astype(float) |
| 4168 | + values = values[notnull(values)] |
| 4169 | + if len(values) == 0: |
| 4170 | + return boxer(np.nan) |
| 4171 | + else: |
| 4172 | + if _np_version_under1p9: |
| 4173 | + return boxer(_quantile(values, per)) |
| 4174 | + else: |
| 4175 | + return boxer(_quantile(values, per, |
| 4176 | + interpolation=interpolation)) |
| 4177 | + |
| 4178 | + data = self._get_numeric_data() if numeric_only else self |
| 4179 | + |
| 4180 | + axis = self._get_axis_number(axis) |
| 4181 | + |
| 4182 | + if axis == 1: |
| 4183 | + data = data.T |
| 4184 | + |
| 4185 | + quantiles = [[f(vals, x, interpolation) for x in per] |
| 4186 | + for (_, vals) in data.iteritems()] |
| 4187 | + |
| 4188 | + result = self._constructor(quantiles, index=data._info_axis, |
| 4189 | + columns=q).T |
| 4190 | + if squeeze: |
| 4191 | + if result.shape == (1, 1): |
| 4192 | + result = result.T.iloc[:, 0] # don't want scalar |
| 4193 | + else: |
| 4194 | + result = result.T.squeeze() |
| 4195 | + result.name = None # For groupby, so it can set an index name |
| 4196 | + return result |
| 4197 | + |
4113 | 4198 | _shared_docs['align'] = ("""
|
4114 | 4199 | Align two object on their axes with the
|
4115 | 4200 | specified join method for each axis Index
|
|
0 commit comments