diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 12fe78a0f8a18..ff5258e37e352 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1238,6 +1238,7 @@ def group_nth( if nobs[i, j] < min_count: if uses_mask: result_mask[i, j] = True + out[i, j] = 0 elif iu_64_floating_obj_t is int64_t: # TODO: only if datetimelike? out[i, j] = NPY_NAT diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6e6ef14a25941..eadba375c66a3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -367,6 +367,7 @@ def _reconstruct_ea_result( """ Construct an ExtensionArray result from an ndarray result. """ + dtype: BaseMaskedDtype | StringDtype if isinstance(values.dtype, StringDtype): dtype = values.dtype @@ -375,19 +376,17 @@ def _reconstruct_ea_result( elif isinstance(values.dtype, BaseMaskedDtype): new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) + dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) # error: Incompatible types in assignment (expression has type - # "BaseMaskedDtype", variable has type "StringDtype") - dtype = BaseMaskedDtype.from_numpy_dtype( # type: ignore[assignment] - new_dtype - ) - cls = dtype.construct_array_type() + # "Type[BaseMaskedArray]", variable has type "Type[BaseStringArray]") + cls = dtype.construct_array_type() # type: ignore[assignment] return cls._from_sequence(res_values, dtype=dtype) - elif needs_i8_conversion(values.dtype): - assert res_values.dtype.kind != "f" # just to be on the safe side - i8values = res_values.view("i8") - # error: Too many arguments for "ExtensionArray" - return type(values)(i8values, dtype=values.dtype) # type: ignore[call-arg] + elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)): + # In to_cython_values we took a view as M8[ns] + assert res_values.dtype == "M8[ns]" + res_values = res_values.view(values._ndarray.dtype) + return values._from_backing_data(res_values) raise NotImplementedError @@ -425,12 +424,8 @@ def _masked_ea_wrap_cython_operation( **kwargs, ) - new_dtype = self._get_result_dtype(orig_values.dtype.numpy_dtype) - dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) - # TODO: avoid cast as res_values *should* already have the right - # dtype; last attempt ran into trouble on 32bit linux build - res_values = res_values.astype(dtype.type, copy=False) - + # res_values should already have the correct dtype, we just need to + # wrap in a MaskedArray return orig_values._maybe_mask_result(res_values, result_mask) @final