diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4f0ca97310d85..2c2f0ad4c006f 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1273,6 +1273,7 @@ Other - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) +- Bug in :meth:`DataFrame.apply` returning wrong result in some cases when dtype was involved in passed function (:issue:`28773`) - Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`) - Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a013434491589..90c1f20b7da8a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -14,7 +14,10 @@ is_list_like, is_sequence, ) -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCMultiIndex, +) from pandas.core.construction import create_series_with_explicit_dtype @@ -271,16 +274,22 @@ def apply_standard(self): # we cannot reduce using non-numpy dtypes, # as demonstrated in gh-12244 - if ( + can_reduce = ( self.result_type in ["reduce", None] and not self.dtypes.apply(is_extension_array_dtype).any() + # Disallow complex_internals since libreduction shortcut + # cannot handle MultiIndex + and not isinstance(self.agg_axis, ABCMultiIndex) # Disallow dtypes where setting _index_data will break # ExtensionArray values, see GH#31182 and not self.dtypes.apply(lambda x: x.kind in ["m", "M"]).any() # Disallow complex_internals since libreduction shortcut raises a TypeError and not self.agg_axis._has_complex_internals - ): + ) + + column_by_column = (self.axis != 0 and self.axis != "index") or self.obj._is_homogeneous_type + if can_reduce and column_by_column: values = self.values index = self.obj._get_axis(self.axis) labels = self.agg_axis @@ -309,9 +318,17 @@ def apply_standard(self): else: return self.obj._constructor_sliced(result, index=labels) + # compute the result using the series generator results, res_index = self.apply_series_generator() + if can_reduce and not column_by_column: + results = list(results.values()) + results = np.array(results) + return self.obj._constructor_sliced( + results, index=res_index + ) + # wrap results return self.wrap_results(results, res_index) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index e328523253144..34267c413920a 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -704,6 +704,13 @@ def test_apply_dup_names_multi_agg(self): tm.assert_frame_equal(result, expected) + def test_apply_get_dtype(self): + # GH 28773 + df = DataFrame({"col_1": [1, 2, 3], "col_2": ["hi", "there", "friend"]}) + result = df.apply(lambda x: x.dtype) + expected = Series(data=["int64", "object"], index=["col_1", "col_2"]) + tm.assert_series_equal(result, expected) + def test_apply_nested_result_axis_1(self): # GH 13820 def apply_list(row):