Skip to content

Commit 5832ba9

Browse files
committed
BUG: Fix quantile calculation. Only move -1 labels if there are any labels #33200
1 parent 8e6af0e commit 5832ba9

File tree

1 file changed

+40
-37
lines changed

1 file changed

+40
-37
lines changed

pandas/_libs/groupby.pyx

+40-37
Original file line numberDiff line numberDiff line change
@@ -779,47 +779,50 @@ def group_quantile(ndarray[float64_t] out,
779779
non_na_counts[lab] += 1
780780

781781
if labels.any():
782-
# Get an index of values sorted by labels and then values
782+
# Put '-1' (NaN) labels as the last group so it does not interfere
783+
# with the calculations.
783784
labels[labels==-1] = np.max(labels) + 1
784-
order = (values, labels)
785-
sort_arr= np.lexsort(order).astype(np.int64, copy=False)
786-
with nogil:
787-
for i in range(ngroups):
788-
# Figure out how many group elements there are
789-
grp_sz = counts[i]
790-
non_na_sz = non_na_counts[i]
785+
# Get an index of values sorted by labels and then values
786+
order = (values, labels)
787+
sort_arr = np.lexsort(order).astype(np.int64, copy=False)
788+
789+
with nogil:
790+
for i in range(ngroups):
791+
# Figure out how many group elements there are
792+
grp_sz = counts[i]
793+
non_na_sz = non_na_counts[i]
791794

792-
if non_na_sz == 0:
793-
out[i] = NaN
795+
if non_na_sz == 0:
796+
out[i] = NaN
797+
else:
798+
# Calculate where to retrieve the desired value
799+
# Casting to int will intentionally truncate result
800+
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
801+
802+
val = values[sort_arr[idx]]
803+
# If requested quantile falls evenly on a particular index
804+
# then write that index's value out. Otherwise interpolate
805+
q_idx = q * (non_na_sz - 1)
806+
frac = q_idx % 1
807+
808+
if frac == 0.0 or interp == INTERPOLATION_LOWER:
809+
out[i] = val
794810
else:
795-
# Calculate where to retrieve the desired value
796-
# Casting to int will intentionally truncate result
797-
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
798-
799-
val = values[sort_arr[idx]]
800-
# If requested quantile falls evenly on a particular index
801-
# then write that index's value out. Otherwise interpolate
802-
q_idx = q * (non_na_sz - 1)
803-
frac = q_idx % 1
804-
805-
if frac == 0.0 or interp == INTERPOLATION_LOWER:
806-
out[i] = val
807-
else:
808-
next_val = values[sort_arr[idx + 1]]
809-
if interp == INTERPOLATION_LINEAR:
810-
out[i] = val + (next_val - val) * frac
811-
elif interp == INTERPOLATION_HIGHER:
811+
next_val = values[sort_arr[idx + 1]]
812+
if interp == INTERPOLATION_LINEAR:
813+
out[i] = val + (next_val - val) * frac
814+
elif interp == INTERPOLATION_HIGHER:
815+
out[i] = next_val
816+
elif interp == INTERPOLATION_MIDPOINT:
817+
out[i] = (val + next_val) / 2.0
818+
elif interp == INTERPOLATION_NEAREST:
819+
if frac > .5 or (frac == .5 and q > .5): # Always OK?
812820
out[i] = next_val
813-
elif interp == INTERPOLATION_MIDPOINT:
814-
out[i] = (val + next_val) / 2.0
815-
elif interp == INTERPOLATION_NEAREST:
816-
if frac > .5 or (frac == .5 and q > .5): # Always OK?
817-
out[i] = next_val
818-
else:
819-
out[i] = val
820-
821-
# Increment the index reference in sorted_arr for the next group
822-
grp_start += grp_sz
821+
else:
822+
out[i] = val
823+
824+
# Increment the index reference in sorted_arr for the next group
825+
grp_start += grp_sz
823826

824827

825828
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)