From 3fa0f6a092a3c7e67be2db48e72a5dfcd2d5be2c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Mar 2022 18:51:02 -0800 Subject: [PATCH 1/4] troubleshoot 32bit build --- pandas/core/groupby/ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e4e42e7a1178e..b5f9240e4481a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -375,6 +375,8 @@ def _reconstruct_ea_result( elif isinstance(values.dtype, BaseMaskedDtype): new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) + # Troubleshooting 32bit build + assert new_dtype == res_values.dtype, (new_dtype, res_values.dtype) # error: Incompatible types in assignment (expression has type # "BaseMaskedDtype", variable has type "StringDtype") dtype = BaseMaskedDtype.from_numpy_dtype( # type: ignore[assignment] @@ -426,6 +428,8 @@ def _masked_ea_wrap_cython_operation( ) new_dtype = self._get_result_dtype(orig_values.dtype.numpy_dtype) + # Troubleshooting 32bit build + assert new_dtype == res_values.dtype, (new_dtype, res_values.dtype) dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) # TODO: avoid cast as res_values *should* already have the right # dtype; last attempt ran into trouble on 32bit linux build From d2247d2e07caadfca690c13485ba8aa9e824812d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Mar 2022 17:32:42 -0800 Subject: [PATCH 2/4] troubleshoot --- pandas/core/groupby/ops.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b5f9240e4481a..9d322b43a705d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -367,6 +367,7 @@ def _reconstruct_ea_result( """ Construct an ExtensionArray result from an ndarray result. """ + dtype: BaseMaskedDtype | StringDtype if isinstance(values.dtype, StringDtype): dtype = values.dtype @@ -375,21 +376,17 @@ def _reconstruct_ea_result( elif isinstance(values.dtype, BaseMaskedDtype): new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) - # Troubleshooting 32bit build - assert new_dtype == res_values.dtype, (new_dtype, res_values.dtype) + dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) # error: Incompatible types in assignment (expression has type - # "BaseMaskedDtype", variable has type "StringDtype") - dtype = BaseMaskedDtype.from_numpy_dtype( # type: ignore[assignment] - new_dtype - ) - cls = dtype.construct_array_type() + # "Type[BaseMaskedArray]", variable has type "Type[BaseStringArray]") + cls = dtype.construct_array_type() # type: ignore[assignment] return cls._from_sequence(res_values, dtype=dtype) - elif needs_i8_conversion(values.dtype): - assert res_values.dtype.kind != "f" # just to be on the safe side - i8values = res_values.view("i8") - # error: Too many arguments for "ExtensionArray" - return type(values)(i8values, dtype=values.dtype) # type: ignore[call-arg] + elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)): + # In to_cython_values we took a view as M8[ns] + assert res_values.dtype == "M8[ns]" + res_values = res_values.view(values._ndarray.dtype) + return values._from_backing_data(res_values) raise NotImplementedError From 434fe4f907f1b412fcb2808e1713ad24a27a6920 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Mar 2022 12:15:47 -0800 Subject: [PATCH 3/4] troubleshoot 32bit build --- pandas/_libs/groupby.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 12fe78a0f8a18..ff5258e37e352 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1238,6 +1238,7 @@ def group_nth( if nobs[i, j] < min_count: if uses_mask: result_mask[i, j] = True + out[i, j] = 0 elif iu_64_floating_obj_t is int64_t: # TODO: only if datetimelike? out[i, j] = NPY_NAT From a3357d0dbb5cf80183406355ab62e3d1f9eaeb28 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Mar 2022 13:33:54 -0800 Subject: [PATCH 4/4] fixed --- pandas/core/groupby/ops.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index fe94a08741688..eadba375c66a3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -424,14 +424,8 @@ def _masked_ea_wrap_cython_operation( **kwargs, ) - new_dtype = self._get_result_dtype(orig_values.dtype.numpy_dtype) - # Troubleshooting 32bit build - assert new_dtype == res_values.dtype, (new_dtype, res_values.dtype) - dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) - # TODO: avoid cast as res_values *should* already have the right - # dtype; last attempt ran into trouble on 32bit linux build - res_values = res_values.astype(dtype.type, copy=False) - + # res_values should already have the correct dtype, we just need to + # wrap in a MaskedArray return orig_values._maybe_mask_result(res_values, result_mask) @final