Skip to content

Commit 60185e1

Browse files
WillAydjreback
authored andcommitted
Backports for 0.25.3 (#29313)
* Backport PR #27826 for 0.25.3 release * BUG: Fix groupby quantile segfault Validate that q is between 0 and 1. Closes #27470 * prettier * Backport PR #29173 for 0.25.3 release * Backport PR #29296 for 0.25.3 release * Backport PR #29294 for 0.25.3 whatsnew
1 parent 0efc71b commit 60185e1

File tree

6 files changed

+67
-5
lines changed

6 files changed

+67
-5
lines changed

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Version 0.25
1616
.. toctree::
1717
:maxdepth: 2
1818

19+
v0.25.3
1920
v0.25.2
2021
v0.25.1
2122
v0.25.0

doc/source/whatsnew/v0.25.3.rst

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
.. _whatsnew_0253:
2+
3+
What's new in 0.25.3 (October 31, 2019)
4+
---------------------------------------
5+
6+
These are the changes in pandas 0.25.3. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
.. _whatsnew_0253.bug_fixes:
10+
11+
Bug fixes
12+
~~~~~~~~~
13+
14+
Groupby/resample/rolling
15+
^^^^^^^^^^^^^^^^^^^^^^^^
16+
17+
- Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`)
18+
19+
Contributors
20+
~~~~~~~~~~~~
21+
22+
.. contributors:: v0.25.2..HEAD

pandas/_libs/groupby.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,11 @@ def group_quantile(ndarray[float64_t] out,
719719
ndarray[int64_t] counts, non_na_counts, sort_arr
720720

721721
assert values.shape[0] == N
722+
723+
if not (0 <= q <= 1):
724+
raise ValueError("'q' must be between 0 and 1. Got"
725+
" '{}' instead".format(q))
726+
722727
inter_methods = {
723728
'linear': INTERPOLATION_LINEAR,
724729
'lower': INTERPOLATION_LOWER,
@@ -736,6 +741,9 @@ def group_quantile(ndarray[float64_t] out,
736741
with nogil:
737742
for i in range(N):
738743
lab = labels[i]
744+
if lab == -1: # NA group label
745+
continue
746+
739747
counts[lab] += 1
740748
if not mask[i]:
741749
non_na_counts[lab] += 1

pandas/io/parsers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2935,7 +2935,7 @@ def _next_iter_line(self, row_num):
29352935
if self.warn_bad_lines or self.error_bad_lines:
29362936
msg = str(e)
29372937

2938-
if "NULL byte" in msg:
2938+
if "NULL byte" in msg or "line contains NUL" in msg:
29392939
msg = (
29402940
"NULL byte detected. This byte "
29412941
"cannot be processed in Python's "

pandas/tests/groupby/test_function.py

+34
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,40 @@ def test_quantile_raises():
13161316
df.groupby("key").quantile()
13171317

13181318

1319+
def test_quantile_out_of_bounds_q_raises():
1320+
# https://github.com/pandas-dev/pandas/issues/27470
1321+
df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
1322+
g = df.groupby([0, 0, 0, 1, 1, 1])
1323+
with pytest.raises(ValueError, match="Got '50.0' instead"):
1324+
g.quantile(50)
1325+
1326+
with pytest.raises(ValueError, match="Got '-1.0' instead"):
1327+
g.quantile(-1)
1328+
1329+
1330+
def test_quantile_missing_group_values_no_segfaults():
1331+
# GH 28662
1332+
data = np.array([1.0, np.nan, 1.0])
1333+
df = pd.DataFrame(dict(key=data, val=range(3)))
1334+
1335+
# Random segfaults; would have been guaranteed in loop
1336+
grp = df.groupby("key")
1337+
for _ in range(100):
1338+
grp.quantile()
1339+
1340+
1341+
def test_quantile_missing_group_values_correct_results():
1342+
# GH 28662
1343+
data = np.array([1.0, np.nan, 3.0, np.nan])
1344+
df = pd.DataFrame(dict(key=data, val=range(4)))
1345+
1346+
result = df.groupby("key").quantile()
1347+
expected = pd.DataFrame(
1348+
[1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
1349+
)
1350+
tm.assert_frame_equal(result, expected)
1351+
1352+
13191353
# pipe
13201354
# --------------------------------
13211355

pandas/tests/io/parser/test_common.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1898,10 +1898,7 @@ def test_null_byte_char(all_parsers):
18981898
out = parser.read_csv(StringIO(data), names=names)
18991899
tm.assert_frame_equal(out, expected)
19001900
else:
1901-
if compat.PY38:
1902-
msg = "line contains NUL"
1903-
else:
1904-
msg = "NULL byte detected"
1901+
msg = "NULL byte detected"
19051902
with pytest.raises(ParserError, match=msg):
19061903
parser.read_csv(StringIO(data), names=names)
19071904

0 commit comments

Comments
 (0)