diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 9eeaaf97d8ac6..66067d25d133b 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -15,12 +15,17 @@ class Melt: - def setup(self): - self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"]) - self.df["id1"] = np.random.randint(0, 10, 10000) - self.df["id2"] = np.random.randint(100, 1000, 10000) + params = ["float64", "Float64"] + param_names = ["dtype"] + + def setup(self, dtype): + self.df = DataFrame( + np.random.randn(100_000, 3), columns=["A", "B", "C"], dtype=dtype + ) + self.df["id1"] = pd.Series(np.random.randint(0, 10, 10000)) + self.df["id2"] = pd.Series(np.random.randint(100, 1000, 10000)) - def time_melt_dataframe(self): + def time_melt_dataframe(self, dtype): melt(self.df, id_vars=["id1", "id2"]) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index cbad169fe4d56..6d851c9aa8570 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -906,6 +906,7 @@ Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`) - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`) +- Bug in :meth:`DataFrame.melt` losing extension array dtype (:issue:`41570`) - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 300073d893c67..633ef12265245 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -33,6 +33,8 @@ from pandas.core.tools.numeric import to_numeric if TYPE_CHECKING: + from pandas._typing import AnyArrayLike + from pandas import DataFrame @@ -124,7 +126,7 @@ def melt( N, K = frame.shape K -= len(id_vars) - mdata = {} + mdata: dict[Hashable, AnyArrayLike] = {} for col in id_vars: id_data = frame.pop(col) if is_extension_array_dtype(id_data): @@ -141,17 +143,15 @@ def melt( mcolumns = id_vars + var_name + [value_name] - # error: Incompatible types in assignment (expression has type "ndarray", - # target has type "Series") - mdata[value_name] = frame._values.ravel("F") # type: ignore[assignment] + if frame.shape[1] > 0: + mdata[value_name] = concat( + [frame.iloc[:, i] for i in range(frame.shape[1])] + ).values + else: + mdata[value_name] = frame._values.ravel("F") for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index - - # error: Incompatible types in assignment (expression has type "ndarray", target - # has type "Series") - mdata[col] = np.asanyarray( # type: ignore[assignment] - frame.columns._get_level_values(i) - ).repeat(N) + mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N) result = frame._constructor(mdata, columns=mcolumns) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index fe88b7f9caa02..bc8ce5df6dd9b 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -420,6 +420,24 @@ def test_melt_with_duplicate_columns(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("dtype", ["Int8", "Int64"]) + def test_melt_ea_dtype(self, dtype): + # GH#41570 + df = DataFrame( + { + "a": pd.Series([1, 2], dtype="Int8"), + "b": pd.Series([3, 4], dtype=dtype), + } + ) + result = df.melt() + expected = DataFrame( + { + "variable": ["a", "a", "b", "b"], + "value": pd.Series([1, 2, 3, 4], dtype=dtype), + } + ) + tm.assert_frame_equal(result, expected) + class TestLreshape: def test_pairs(self):