From 9dd4d05d8860d5b75b2cdeb31fa63f43423d2b2a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 16 Mar 2018 10:23:04 -0500 Subject: [PATCH 1/2] BUG: Handle all-NA blocks in concat Previously we special cased all-na blocks. We should only do that for non-extension dtypes. --- pandas/core/internals.py | 3 +- pandas/tests/extension/base/reshaping.py | 15 ++++++++ .../tests/extension/decimal/test_decimal.py | 35 ++++++++++++++----- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 240c9b1f3377c..0b97597201d6b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5393,7 +5393,8 @@ def is_uniform_join_units(join_units): # all blocks need to have the same type all(type(ju.block) is type(join_units[0].block) for ju in join_units) and # noqa # no blocks that would get missing values (can lead to type upcasts) - all(not ju.is_na for ju in join_units) and + # unless we're an extension dtype. + all(not ju.is_na or ju.block.is_extension for ju in join_units) and # no blocks with indexers (as then the dimensions do not fit) all(not ju.indexers for ju in join_units) and # disregard Panels diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index cfb70f2291555..8e80ddb12b1c3 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -25,6 +25,21 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) + @pytest.mark.parametrize('in_frame', [True]) + def test_concat_all_na_block(self, data_missing, in_frame): + valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) + na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + if in_frame: + valid_block = pd.DataFrame({"a": valid_block}) + na_block = pd.DataFrame({"a": na_block}) + result = pd.concat([valid_block, na_block]) + if in_frame: + expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + self.assert_frame_equal(result, expected) + else: + expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + self.assert_series_equal(result, expected) + def test_align(self, data, na_value): a = data[:3] b = data[2:5] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7b4d079ecad87..7c613d04899b7 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -35,19 +35,38 @@ def na_value(): return decimal.Decimal("NaN") -class TestDtype(base.BaseDtypeTests): +class BaseDecimal(object): + + def assert_series_equal(self, left, right, *args, **kwargs): + + left_na = left.isna() + right_na = right.isna() + + tm.assert_series_equal(left_na, right_na) + return tm.assert_series_equal(left[~left_na], + right[~right_na], + *args, **kwargs) + + def assert_frame_equal(self, left, right, *args, **kwargs): + self.assert_series_equal(left.dtypes, right.dtypes) + for col in left.columns: + self.assert_series_equal(left[col], right[col], + *args, **kwargs) + + +class TestDtype(BaseDecimal, base.BaseDtypeTests): pass -class TestInterface(base.BaseInterfaceTests): +class TestInterface(BaseDecimal, base.BaseInterfaceTests): pass -class TestConstructors(base.BaseConstructorsTests): +class TestConstructors(BaseDecimal, base.BaseConstructorsTests): pass -class TestReshaping(base.BaseReshapingTests): +class TestReshaping(BaseDecimal, base.BaseReshapingTests): def test_align(self, data, na_value): # Have to override since assert_series_equal doesn't @@ -88,15 +107,15 @@ def test_align_frame(self, data, na_value): assert e2.loc[0, 'A'].is_nan() -class TestGetitem(base.BaseGetitemTests): +class TestGetitem(BaseDecimal, base.BaseGetitemTests): pass -class TestMissing(base.BaseMissingTests): +class TestMissing(BaseDecimal, base.BaseMissingTests): pass -class TestMethods(base.BaseMethodsTests): +class TestMethods(BaseDecimal, base.BaseMethodsTests): @pytest.mark.parametrize('dropna', [True, False]) @pytest.mark.xfail(reason="value_counts not implemented yet.") def test_value_counts(self, all_data, dropna): @@ -112,7 +131,7 @@ def test_value_counts(self, all_data, dropna): tm.assert_series_equal(result, expected) -class TestCasting(base.BaseCastingTests): +class TestCasting(BaseDecimal, base.BaseCastingTests): pass From f16f59fae11c1374cfb721870df694e63d7472e6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 16 Mar 2018 15:37:01 -0500 Subject: [PATCH 2/2] Fix boxing --- pandas/tests/extension/base/reshaping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 8e80ddb12b1c3..9b9a614889bef 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -25,7 +25,7 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) - @pytest.mark.parametrize('in_frame', [True]) + @pytest.mark.parametrize('in_frame', [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) @@ -37,7 +37,7 @@ def test_concat_all_na_block(self, data_missing, in_frame): expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) self.assert_frame_equal(result, expected) else: - expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + expected = pd.Series(data_missing.take([1, 1, 0, 0])) self.assert_series_equal(result, expected) def test_align(self, data, na_value):