2
2
3
3
import numpy as np
4
4
5
+ from pandas ._libs import lib
5
6
from pandas ._typing import (
6
7
ArrayLike ,
8
+ Scalar ,
7
9
npt ,
8
10
)
11
+ from pandas .compat .numpy import np_percentile_argname
9
12
10
13
from pandas .core .dtypes .missing import (
11
14
isna ,
12
15
na_value_for_dtype ,
13
16
)
14
17
15
- from pandas .core .nanops import nanpercentile
16
-
17
18
18
19
def quantile_compat (
19
20
values : ArrayLike , qs : npt .NDArray [np .float64 ], interpolation : str
@@ -41,7 +42,7 @@ def quantile_compat(
41
42
42
43
def quantile_with_mask (
43
44
values : np .ndarray ,
44
- mask : np .ndarray ,
45
+ mask : npt . NDArray [ np .bool_ ] ,
45
46
fill_value ,
46
47
qs : npt .NDArray [np .float64 ],
47
48
interpolation : str ,
@@ -84,10 +85,9 @@ def quantile_with_mask(
84
85
flat = np .array ([fill_value ] * len (qs ))
85
86
result = np .repeat (flat , len (values )).reshape (len (values ), len (qs ))
86
87
else :
87
- # asarray needed for Sparse, see GH#24600
88
- result = nanpercentile (
88
+ result = _nanpercentile (
89
89
values ,
90
- np . array ( qs ) * 100 ,
90
+ qs * 100.0 ,
91
91
na_value = fill_value ,
92
92
mask = mask ,
93
93
interpolation = interpolation ,
@@ -97,3 +97,92 @@ def quantile_with_mask(
97
97
result = result .T
98
98
99
99
return result
100
+
101
+
102
+ def _nanpercentile_1d (
103
+ values : np .ndarray ,
104
+ mask : npt .NDArray [np .bool_ ],
105
+ qs : npt .NDArray [np .float64 ],
106
+ na_value : Scalar ,
107
+ interpolation ,
108
+ ) -> Scalar | np .ndarray :
109
+ """
110
+ Wrapper for np.percentile that skips missing values, specialized to
111
+ 1-dimensional case.
112
+
113
+ Parameters
114
+ ----------
115
+ values : array over which to find quantiles
116
+ mask : ndarray[bool]
117
+ locations in values that should be considered missing
118
+ qs : np.ndarray[float64] of quantile indices to find
119
+ na_value : scalar
120
+ value to return for empty or all-null values
121
+ interpolation : str
122
+
123
+ Returns
124
+ -------
125
+ quantiles : scalar or array
126
+ """
127
+ # mask is Union[ExtensionArray, ndarray]
128
+ values = values [~ mask ]
129
+
130
+ if len (values ) == 0 :
131
+ return np .array ([na_value ] * len (qs ), dtype = values .dtype )
132
+
133
+ return np .percentile (values , qs , ** {np_percentile_argname : interpolation })
134
+
135
+
136
+ def _nanpercentile (
137
+ values : np .ndarray ,
138
+ qs : npt .NDArray [np .float64 ],
139
+ * ,
140
+ na_value ,
141
+ mask : npt .NDArray [np .bool_ ],
142
+ interpolation ,
143
+ ):
144
+ """
145
+ Wrapper for np.percentile that skips missing values.
146
+
147
+ Parameters
148
+ ----------
149
+ values : np.ndarray[ndim=2] over which to find quantiles
150
+ qs : np.ndarray[float64] of quantile indices to find
151
+ na_value : scalar
152
+ value to return for empty or all-null values
153
+ mask : np.ndarray[bool]
154
+ locations in values that should be considered missing
155
+ interpolation : str
156
+
157
+ Returns
158
+ -------
159
+ quantiles : scalar or array
160
+ """
161
+
162
+ if values .dtype .kind in ["m" , "M" ]:
163
+ # need to cast to integer to avoid rounding errors in numpy
164
+ result = _nanpercentile (
165
+ values .view ("i8" ),
166
+ qs = qs ,
167
+ na_value = na_value .view ("i8" ),
168
+ mask = mask ,
169
+ interpolation = interpolation ,
170
+ )
171
+
172
+ # Note: we have to do `astype` and not view because in general we
173
+ # have float result at this point, not i8
174
+ return result .astype (values .dtype )
175
+
176
+ if not lib .is_scalar (mask ) and mask .any ():
177
+ # Caller is responsible for ensuring mask shape match
178
+ assert mask .shape == values .shape
179
+ result = [
180
+ _nanpercentile_1d (val , m , qs , na_value , interpolation = interpolation )
181
+ for (val , m ) in zip (list (values ), list (mask ))
182
+ ]
183
+ result = np .array (result , dtype = values .dtype , copy = False ).T
184
+ return result
185
+ else :
186
+ return np .percentile (
187
+ values , qs , axis = 1 , ** {np_percentile_argname : interpolation }
188
+ )
0 commit comments