From 0fed6c70793e2990d189747a6a98428417103848 Mon Sep 17 00:00:00 2001 From: Sven Date: Wed, 14 Sep 2022 15:40:11 +1000 Subject: [PATCH 1/5] Added test case to lock in behaviour * In previous versions, concatenating to empty EA was resetting type information to np.object --- pandas/tests/extension/test_integer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index a6cf820dc7609..afd45dd827a7e 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -234,3 +234,14 @@ class TestParsing(base.BaseParsingTests): class Test2DCompat(base.Dim2CompatTests): pass + + +def test_concat_to_empty_ea(): + """`concat` to an empty EA should maintain type EA information.""" + df_empty = pd.DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) + + df_new = pd.DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) + + result = pd.concat([df_empty, df_new]) + + assert result["a"].dtype == df_empty["a"].dtype == df_new["a"].dtype From c7920bc9122610e5dc8c8258b60ee4a5fc480684 Mon Sep 17 00:00:00 2001 From: Sven Date: Wed, 14 Sep 2022 15:50:30 +1000 Subject: [PATCH 2/5] Update whatsnew --- doc/source/whatsnew/v1.6.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index a7c9a7eb88221..4bf6b23d6ab32 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -212,6 +212,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) +- Loss of dtype information when concatenating EA with empty EA of same type (:issue:`48510`) - Styler From fd794768ad86ad934f9971b2233c6b56a91000ae Mon Sep 17 00:00:00 2001 From: Sven Date: Thu, 15 Sep 2022 08:23:38 +1000 Subject: [PATCH 3/5] Addressed code review comments * whatsnew: made "Loss of dtype" more specific and improved wording (EA -> ExtensionArray, etc) * testcase: moved test case to test_empty.py and use `tm.assert_frame_equal()` with an expected dataframe --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/tests/extension/test_integer.py | 11 ----------- pandas/tests/reshape/concat/test_empty.py | 8 ++++++++ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 4bf6b23d6ab32..ac34b2be9452d 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -212,7 +212,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) -- Loss of dtype information when concatenating EA with empty EA of same type (:issue:`48510`) +- When concatenating DataFrame containing an empty ExtensionArray Series with another DataFrame with an ExtensionArray of the same type, the resulting dtype turned into object (:issue:`48510`) - Styler diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index afd45dd827a7e..a6cf820dc7609 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -234,14 +234,3 @@ class TestParsing(base.BaseParsingTests): class Test2DCompat(base.Dim2CompatTests): pass - - -def test_concat_to_empty_ea(): - """`concat` to an empty EA should maintain type EA information.""" - df_empty = pd.DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) - - df_new = pd.DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) - - result = pd.concat([df_empty, df_new]) - - assert result["a"].dtype == df_empty["a"].dtype == df_new["a"].dtype diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index d78337131bb97..f7074dd1b1a83 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -284,3 +284,11 @@ def test_concat_empty_dataframe_different_dtypes(self): result = concat([df1[:0], df2[:0]]) assert result["a"].dtype == np.int64 assert result["b"].dtype == np.object_ + + def test_concat_to_empty_ea(self): + """48510 `concat` to an empty EA should maintain type EA dtype.""" + df_empty = pd.DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) + df_new = pd.DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) + expected = df_new.copy() + result = pd.concat([df_empty, df_new]) + tm.assert_frame_equal(result, expected) From 711906bc48ca29bb30996ffc25f6343d8e78408e Mon Sep 17 00:00:00 2001 From: Sven Date: Thu, 15 Sep 2022 10:43:51 +1000 Subject: [PATCH 4/5] Fixed style issue * Use concat/DataFrame as they were directly imported (instead of `pd...`) --- pandas/tests/reshape/concat/test_empty.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index f7074dd1b1a83..541a34bde8143 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -287,8 +287,8 @@ def test_concat_empty_dataframe_different_dtypes(self): def test_concat_to_empty_ea(self): """48510 `concat` to an empty EA should maintain type EA dtype.""" - df_empty = pd.DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) - df_new = pd.DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) + df_empty = DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())}) + df_new = DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())}) expected = df_new.copy() - result = pd.concat([df_empty, df_new]) + result = concat([df_empty, df_new]) tm.assert_frame_equal(result, expected) From 4675802bdf8ae0a689ebe6957fbf1f5805a243a8 Mon Sep 17 00:00:00 2001 From: Sven Date: Thu, 15 Sep 2022 10:47:14 +1000 Subject: [PATCH 5/5] Improved whatsnew entry as per suggestion --- doc/source/whatsnew/v1.6.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index ac34b2be9452d..a8baa6782d5a0 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -212,7 +212,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) -- When concatenating DataFrame containing an empty ExtensionArray Series with another DataFrame with an ExtensionArray of the same type, the resulting dtype turned into object (:issue:`48510`) +- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - Styler