Skip to content

Commit 1daaef1

Browse files
committed
BUG: Fix quantile calculation. Only move -1 labels if there are any labels pandas-dev#33200
1 parent 8e6af0e commit 1daaef1

File tree

1 file changed

+39
-37
lines changed

1 file changed

+39
-37
lines changed

pandas/_libs/groupby.pyx

+39-37
Original file line numberDiff line numberDiff line change
@@ -779,47 +779,49 @@ def group_quantile(ndarray[float64_t] out,
779779
non_na_counts[lab] += 1
780780

781781
if labels.any():
782-
# Get an index of values sorted by labels and then values
782+
# Put '-1' (NaN) labels as the last group so it does not interfere
783+
# with the calculations.
783784
labels[labels==-1] = np.max(labels) + 1
784-
order = (values, labels)
785-
sort_arr= np.lexsort(order).astype(np.int64, copy=False)
786-
with nogil:
787-
for i in range(ngroups):
788-
# Figure out how many group elements there are
789-
grp_sz = counts[i]
790-
non_na_sz = non_na_counts[i]
785+
# Get an index of values sorted by labels and then values
786+
order = (values, labels)
787+
sort_arr= np.lexsort(order).astype(np.int64, copy=False)
788+
with nogil:
789+
for i in range(ngroups):
790+
# Figure out how many group elements there are
791+
grp_sz = counts[i]
792+
non_na_sz = non_na_counts[i]
791793

792-
if non_na_sz == 0:
793-
out[i] = NaN
794+
if non_na_sz == 0:
795+
out[i] = NaN
796+
else:
797+
# Calculate where to retrieve the desired value
798+
# Casting to int will intentionally truncate result
799+
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
800+
801+
val = values[sort_arr[idx]]
802+
# If requested quantile falls evenly on a particular index
803+
# then write that index's value out. Otherwise interpolate
804+
q_idx = q * (non_na_sz - 1)
805+
frac = q_idx % 1
806+
807+
if frac == 0.0 or interp == INTERPOLATION_LOWER:
808+
out[i] = val
794809
else:
795-
# Calculate where to retrieve the desired value
796-
# Casting to int will intentionally truncate result
797-
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
798-
799-
val = values[sort_arr[idx]]
800-
# If requested quantile falls evenly on a particular index
801-
# then write that index's value out. Otherwise interpolate
802-
q_idx = q * (non_na_sz - 1)
803-
frac = q_idx % 1
804-
805-
if frac == 0.0 or interp == INTERPOLATION_LOWER:
806-
out[i] = val
807-
else:
808-
next_val = values[sort_arr[idx + 1]]
809-
if interp == INTERPOLATION_LINEAR:
810-
out[i] = val + (next_val - val) * frac
811-
elif interp == INTERPOLATION_HIGHER:
810+
next_val = values[sort_arr[idx + 1]]
811+
if interp == INTERPOLATION_LINEAR:
812+
out[i] = val + (next_val - val) * frac
813+
elif interp == INTERPOLATION_HIGHER:
814+
out[i] = next_val
815+
elif interp == INTERPOLATION_MIDPOINT:
816+
out[i] = (val + next_val) / 2.0
817+
elif interp == INTERPOLATION_NEAREST:
818+
if frac > .5 or (frac == .5 and q > .5): # Always OK?
812819
out[i] = next_val
813-
elif interp == INTERPOLATION_MIDPOINT:
814-
out[i] = (val + next_val) / 2.0
815-
elif interp == INTERPOLATION_NEAREST:
816-
if frac > .5 or (frac == .5 and q > .5): # Always OK?
817-
out[i] = next_val
818-
else:
819-
out[i] = val
820-
821-
# Increment the index reference in sorted_arr for the next group
822-
grp_start += grp_sz
820+
else:
821+
out[i] = val
822+
823+
# Increment the index reference in sorted_arr for the next group
824+
grp_start += grp_sz
823825

824826

825827
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)