Skip to content

Commit 697a538

Browse files
authored
BUG: DataFrameGroupBy.quantile raises for non-numeric dtypes rather than dropping columns (#34756)
1 parent 88d81cc commit 697a538

File tree

3 files changed

+23
-3
lines changed

3 files changed

+23
-3
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,7 @@ Groupby/resample/rolling
11181118
- Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`)
11191119
- Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`)
11201120
- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
1121+
- Bug in :meth:`core.groupby.DataFrameGroupBy.quantile` raises ``TypeError`` for non-numeric types rather than dropping columns (:issue:`27892`)
11211122
- Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
11221123
- Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
11231124

pandas/core/groupby/groupby.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -2403,7 +2403,7 @@ def _get_cythonized_result(
24032403
signature
24042404
needs_2d : bool, default False
24052405
Whether the values and result of the Cython call signature
2406-
are at least 2-dimensional.
2406+
are 2-dimensional.
24072407
min_count : int, default None
24082408
When not None, min_count for the Cython call
24092409
needs_mask : bool, default False
@@ -2419,7 +2419,9 @@ def _get_cythonized_result(
24192419
Function should return a tuple where the first element is the
24202420
values to be passed to Cython and the second element is an optional
24212421
type which the values should be converted to after being returned
2422-
by the Cython operation. Raises if `needs_values` is False.
2422+
by the Cython operation. This function is also responsible for
2423+
raising a TypeError if the values have an invalid type. Raises
2424+
if `needs_values` is False.
24232425
post_processing : function, default None
24242426
Function to be applied to result of Cython function. Should accept
24252427
an array of values as the first argument and type inferences as its
@@ -2451,6 +2453,7 @@ def _get_cythonized_result(
24512453
output: Dict[base.OutputKey, np.ndarray] = {}
24522454
base_func = getattr(libgroupby, how)
24532455

2456+
error_msg = ""
24542457
for idx, obj in enumerate(self._iterate_slices()):
24552458
name = obj.name
24562459
values = obj._values
@@ -2477,7 +2480,11 @@ def _get_cythonized_result(
24772480
if needs_values:
24782481
vals = values
24792482
if pre_processing:
2480-
vals, inferences = pre_processing(vals)
2483+
try:
2484+
vals, inferences = pre_processing(vals)
2485+
except TypeError as e:
2486+
error_msg = str(e)
2487+
continue
24812488
if needs_2d:
24822489
vals = vals.reshape((-1, 1))
24832490
vals = vals.astype(cython_dtype, copy=False)
@@ -2509,6 +2516,10 @@ def _get_cythonized_result(
25092516
key = base.OutputKey(label=name, position=idx)
25102517
output[key] = result
25112518

2519+
# error_msg is "" on an frame/series with no rows or columns
2520+
if len(output) == 0 and error_msg != "":
2521+
raise TypeError(error_msg)
2522+
25122523
if aggregate:
25132524
return self._wrap_aggregated_output(output)
25142525
else:

pandas/tests/groupby/test_quantile.py

+8
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,11 @@ def test_groupby_quantile_nullable_array(values, q):
232232

233233
expected = pd.Series(true_quantiles * 2, index=idx, name="b")
234234
tm.assert_series_equal(result, expected)
235+
236+
237+
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
238+
def test_groupby_quantile_skips_invalid_dtype(q):
239+
df = pd.DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
240+
result = df.groupby("a").quantile(q)
241+
expected = df.groupby("a")[["b"]].quantile(q)
242+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)