From e33f12c0a8048b27453651237deb0da3dc1d1d46 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 1 Sep 2021 13:36:49 +0530 Subject: [PATCH 1/7] BUG: transpose casts mixed dtypes to object --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/frame.py | 7 ++++++- pandas/tests/frame/methods/test_transpose.py | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 57f36d1599afb..0ca8bdf8bddb5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -365,7 +365,7 @@ Reshaping - Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`) - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`) - Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`) -- +- Bug in :meth:`DataFrame.transpose`, where mixed dtypes were cast to ``object`` (:issue:`43337`) Sparse ^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index db12129a15ef9..639e7dde4cafa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3381,7 +3381,12 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: new_arr = self.values.T if copy: new_arr = new_arr.copy() - result = self._constructor(new_arr, index=self.columns, columns=self.index) + result = self._constructor( + new_arr, + index=self.columns, + columns=self.index, + dtype=find_common_type(dtypes), + ) return result.__finalize__(self, method="transpose") diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 62537d37a8c11..0e7e6885f9dee 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -103,3 +103,10 @@ def test_transpose_get_view_dt64tzget_view(self): rtrip = result._mgr.blocks[0].values assert np.shares_memory(arr._data, rtrip._data) + + def test_transpose_mixed_dtypes(self): + # GH#43337 + df = DataFrame({"a": [1], "b": [2]}).astype({"b": "Int64"}) + result = df.T + expected = DataFrame([1, 2], index=["a", "b"], dtype="Int64") + tm.assert_frame_equal(result, expected) From 9c3ca03cb5af8f31ffcc2d63c2011fe11bc26657 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 1 Sep 2021 14:38:37 +0530 Subject: [PATCH 2/7] checked empty list --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 639e7dde4cafa..8fb2d08f96ea7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3385,7 +3385,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: new_arr, index=self.columns, columns=self.index, - dtype=find_common_type(dtypes), + dtype=find_common_type(dtypes) if len(dtypes) > 0 else None, ) return result.__finalize__(self, method="transpose") From badd4c18cee0e303bab676f354cd5c2fe4c49af1 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 9 Sep 2021 13:09:01 +0530 Subject: [PATCH 3/7] suggested edit --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1cd2258322be..c23e7679f52b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3390,13 +3390,12 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - if copy: - new_arr = new_arr.copy() + common_dtype = find_common_type(dtypes) if len(dtypes) > 0 else None result = self._constructor( new_arr, index=self.columns, columns=self.index, - dtype=find_common_type(dtypes) if len(dtypes) > 0 else None, + dtype=common_dtype, ) return result.__finalize__(self, method="transpose") From 14edad65c359ca6ccba247ba565ff9264595c4f0 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 12 Sep 2021 16:10:55 +0530 Subject: [PATCH 4/7] pass None if common_dtype is object --- pandas/core/frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c23e7679f52b6..7aa4afbf30bca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3390,7 +3390,11 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - common_dtype = find_common_type(dtypes) if len(dtypes) > 0 else None + common_dtype = None + if len(dtypes) > 0: + common_dtype = find_common_type(dtypes) + if common_dtype is object: + common_dtype = None result = self._constructor( new_arr, index=self.columns, From fca34f9e8dca6af11e737f64f9665523bb7e9cf7 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 13 Sep 2021 23:29:46 +0530 Subject: [PATCH 5/7] print test to debug --- pandas/core/frame.py | 6 +----- pandas/tests/frame/methods/test_transpose.py | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a6627377c3dff..9e9c1732030b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3400,11 +3400,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - common_dtype = None - if len(dtypes) > 0: - common_dtype = find_common_type(dtypes) - if common_dtype is object: - common_dtype = None + common_dtype = find_common_type(dtypes) if len(dtypes) > 0 else None result = self._constructor( new_arr, index=self.columns, diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 0e7e6885f9dee..1b906a409743a 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -37,6 +37,7 @@ def test_transpose_tzaware_2col_mixed_tz(self): df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() + print(df4._can_fast_transpose, df4.T._can_fast_transpose) tm.assert_frame_equal(df4.T.T, df4) @pytest.mark.parametrize("tz", [None, "America/New_York"]) @@ -57,6 +58,7 @@ def test_transpose_object_to_tzaware_mixed_tz(self): df2 = DataFrame([dti, dti2]) assert (df2.dtypes == object).all() res2 = df2.T + print("\n", res2.dtypes, [dti.dtype, dti2.dtype]) assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() def test_transpose_uint64(self, uint64_frame): From 91521c4f00b0ec45173cf40d934c4262059c57cf Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 4 Oct 2021 13:13:17 +0530 Subject: [PATCH 6/7] removed print from tests --- pandas/tests/frame/methods/test_transpose.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 1b906a409743a..0e7e6885f9dee 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -37,7 +37,6 @@ def test_transpose_tzaware_2col_mixed_tz(self): df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() - print(df4._can_fast_transpose, df4.T._can_fast_transpose) tm.assert_frame_equal(df4.T.T, df4) @pytest.mark.parametrize("tz", [None, "America/New_York"]) @@ -58,7 +57,6 @@ def test_transpose_object_to_tzaware_mixed_tz(self): df2 = DataFrame([dti, dti2]) assert (df2.dtypes == object).all() res2 = df2.T - print("\n", res2.dtypes, [dti.dtype, dti2.dtype]) assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() def test_transpose_uint64(self, uint64_frame): From 992c8cf499a4a993b9c5c544e573feb0979b09be Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 15 Dec 2021 22:20:38 +0530 Subject: [PATCH 7/7] added test --- pandas/tests/frame/methods/test_transpose.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 7fca752f2a21e..f09d2c1be0ac1 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -116,3 +116,10 @@ def test_transpose_get_view_dt64tzget_view(self): rtrip = result._mgr.blocks[0].values assert np.shares_memory(arr._ndarray, rtrip._ndarray) + + def test_transpose_mixed_dtypes(self): + # GH#43337 + df = DataFrame({"a": [1], "b": [2]}).astype({"b": "Int64"}) + result = df.T + expected = DataFrame([1, 2], index=["a", "b"], dtype="Int64") + tm.assert_frame_equal(result, expected)