From 82dc4468006474b0022dfe63fc6a40fcba365d18 Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Sun, 19 Apr 2020 21:06:41 +0900 Subject: [PATCH 1/8] BUG: Create empty dataframe with string dtype fails --- pandas/core/internals/construction.py | 5 ++++- pandas/tests/frame/test_constructors.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5c9e4b96047ee..c31c0acfcb003 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -242,7 +242,10 @@ def init_dict(data, index, columns, dtype=None): # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): - if dtype is None or np.issubdtype(dtype, np.flexible): + if is_dtype_equal(dtype, "string"): + # GH 33623 + nan_dtype = dtype + elif dtype is None or np.issubdtype(dtype, np.flexible): # GH#1783 nan_dtype = object else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index baac87755c6d2..0b4608853d0e7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2679,3 +2679,7 @@ def test_construction_from_set_raises(self): msg = "Set type is unordered" with pytest.raises(TypeError, match=msg): pd.DataFrame({"a": {1, 2, 3}}) + + def test_construct_empty_dataframe_with_string_dtype(self): + # GH 33623 + pd.DataFrame(columns=["a"], dtype="string") From d1da8c8cb01f67db4ffab38cecbe6575286cc80e Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Mon, 20 Apr 2020 11:55:49 +0900 Subject: [PATCH 2/8] Changes according to comments --- pandas/core/internals/construction.py | 2 +- pandas/tests/extension/test_common.py | 6 ++++++ pandas/tests/frame/test_constructors.py | 4 ---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c31c0acfcb003..045df38a0421b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -242,7 +242,7 @@ def init_dict(data, index, columns, dtype=None): # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): - if is_dtype_equal(dtype, "string"): + if is_extension_array_dtype(dtype): # GH 33623 nan_dtype = dtype elif dtype is None or np.issubdtype(dtype, np.flexible): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index e43650c291200..9e7eced4c3702 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -79,3 +79,9 @@ def test_astype_no_copy(): def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) assert is_extension_array_dtype(dtype) + + +@pytest.mark.parametrize("columns, dtype", [(["a"], "string")]) +def test_construct_empty_dataframe_with_string_dtype(columns, dtype): + # GH 33623 + pd.DataFrame(columns=columns, dtype=dtype) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 0b4608853d0e7..baac87755c6d2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2679,7 +2679,3 @@ def test_construction_from_set_raises(self): msg = "Set type is unordered" with pytest.raises(TypeError, match=msg): pd.DataFrame({"a": {1, 2, 3}}) - - def test_construct_empty_dataframe_with_string_dtype(self): - # GH 33623 - pd.DataFrame(columns=["a"], dtype="string") From 66eda6a8f91fbb5d0c20dd0c165d9dd05b0b7fe4 Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Sat, 25 Apr 2020 11:55:03 +0900 Subject: [PATCH 3/8] Add test of empty dataframe in ExtensionDtype --- pandas/tests/extension/arrow/test_bool.py | 11 +++++++++++ pandas/tests/extension/base/constructors.py | 4 ++++ pandas/tests/extension/decimal/test_decimal.py | 5 +++++ pandas/tests/extension/json/test_json.py | 10 ++++++++++ pandas/tests/extension/test_boolean.py | 5 +++++ pandas/tests/extension/test_categorical.py | 5 +++++ pandas/tests/extension/test_common.py | 6 ------ pandas/tests/extension/test_datetime.py | 5 +++++ pandas/tests/extension/test_integer.py | 9 ++++++++- pandas/tests/extension/test_interval.py | 5 +++++ pandas/tests/extension/test_numpy.py | 5 +++++ pandas/tests/extension/test_period.py | 5 +++++ pandas/tests/extension/test_sparse.py | 5 +++++ pandas/tests/extension/test_string.py | 7 +++++++ 14 files changed, 80 insertions(+), 7 deletions(-) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 94dd09d3eb053..44ba7ef9dbf86 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -15,6 +15,13 @@ def dtype(): return ArrowBoolDtype() +@pytest.fixture +def columns(): + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray.from_scalars(values) + + @pytest.fixture def data(): values = np.random.randint(0, 2, size=100, dtype=bool) @@ -55,6 +62,10 @@ def test_from_dtype(self, data): def test_from_sequence_from_cls(self, data): super().test_from_sequence_from_cls(data) + @pytest.mark.xfail(reason="bad is-na for empty data") + def test_construct_empty_dataframe(self, columns, dtype): + super().test_construct_empty_dataframe(columns, dtype) + class TestReduce(base.BaseNoReduceTests): def test_reduce_series_boolean(self): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 1ddc7af0f6268..f153f2d36214f 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -83,3 +83,7 @@ def test_pandas_array_dtype(self, data): result = pd.array(data, dtype=np.dtype(object)) expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) self.assert_equal(result, expected) + + def test_construct_empty_dataframe(self, columns, dtype): + # GH 33623 + pd.DataFrame(columns=columns, dtype=dtype) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index f4ffcb8d0f109..a0e694bb670c0 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -17,6 +17,11 @@ def dtype(): return DecimalDtype() +@pytest.fixture +def columns(): + return DecimalArray(make_data()) + + @pytest.fixture def data(): return DecimalArray(make_data()) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index d086896fb09c3..705759a11d749 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -15,6 +15,16 @@ def dtype(): return JSONDtype() +@pytest.fixture +def columns(): + data = make_data() + + while len(data[0]) == len(data[1]): + data = make_data() + + return JSONArray(data) + + @pytest.fixture def data(): """Length-100 PeriodArray for semantics test.""" diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index e2331b69916fb..7be55093ee271 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -33,6 +33,11 @@ def dtype(): return BooleanDtype() +@pytest.fixture +def columns(dtype): + return pd.array([True, False], dtype=dtype) + + @pytest.fixture def data(dtype): return pd.array(make_data(), dtype=dtype) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 059d3453995bd..6af1f36b40366 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -41,6 +41,11 @@ def dtype(): return CategoricalDtype() +@pytest.fixture +def columns(): + return Categorical(make_data()) + + @pytest.fixture def data(): """Length-100 array for this type. diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 9e7eced4c3702..e43650c291200 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -79,9 +79,3 @@ def test_astype_no_copy(): def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) assert is_extension_array_dtype(dtype) - - -@pytest.mark.parametrize("columns, dtype", [(["a"], "string")]) -def test_construct_empty_dataframe_with_string_dtype(columns, dtype): - # GH 33623 - pd.DataFrame(columns=columns, dtype=dtype) diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 3aa188098620d..86a3e627d3f10 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -13,6 +13,11 @@ def dtype(request): return DatetimeTZDtype(unit="ns", tz=request.param) +@pytest.fixture +def columns(dtype): + return DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) + + @pytest.fixture def data(dtype): data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 725533765ca2c..a239085bb4a1a 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -54,6 +54,11 @@ def dtype(request): return request.param() +@pytest.fixture +def columns(): + return integer_array(make_data()) + + @pytest.fixture def data(dtype): return integer_array(make_data(), dtype=dtype) @@ -186,7 +191,9 @@ class TestInterface(base.BaseInterfaceTests): class TestConstructors(base.BaseConstructorsTests): - pass + @pytest.mark.xfail(reason="bad is-na for empty data") + def test_construct_empty_dataframe(self, columns, dtyoe): + super().test_construct_empty_dataframe(columns, dtype) class TestReshaping(base.BaseReshapingTests): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 2411f6cfbd936..db620cf42d373 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -35,6 +35,11 @@ def dtype(): return IntervalDtype() +@pytest.fixture +def columns(): + return IntervalArray(make_data()) + + @pytest.fixture def data(): """Length-100 PeriodArray for semantics test.""" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index aa5a99282131a..8e8556cf39f4e 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -37,6 +37,11 @@ def allow_in_pandas(monkeypatch): yield +@pytest.fixture +def columns(): + return PandasArray(np.arange(1, 3)) + + @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index c439b8b5ed319..9d922d2ffc23c 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -15,6 +15,11 @@ def dtype(): return PeriodDtype(freq="D") +@pytest.fixture +def columns(dtype): + return PeriodArray(np.arange(2020, 2021), freq=dtype.freq) + + @pytest.fixture def data(dtype): return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 694bbee59606f..00a4a2ff599f2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -29,6 +29,11 @@ def dtype(): return SparseDtype() +@pytest.fixture +def columns(): + return SparseArray(np.ones(10) * 2) + + @pytest.fixture(params=[0, np.nan]) def data(request): """Length-100 PeriodArray for semantics test.""" diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 86aed671f1b88..d3daad6357dcd 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -13,6 +13,13 @@ def dtype(): return StringDtype() +@pytest.fixture +def columns(): + strings = np.random.choice(list(string.ascii_letters), size=1) + + return StringArray._from_sequence(strings) + + @pytest.fixture def data(): strings = np.random.choice(list(string.ascii_letters), size=100) From ff26d9511f42b629446e03339c3f9e69b87d0cc7 Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Sun, 26 Apr 2020 21:41:40 +0900 Subject: [PATCH 4/8] Remove column fixtures --- pandas/core/internals/construction.py | 9 +++++---- pandas/tests/extension/arrow/test_bool.py | 11 ++--------- pandas/tests/extension/base/constructors.py | 6 ++++-- pandas/tests/extension/decimal/test_decimal.py | 5 ----- pandas/tests/extension/json/test_json.py | 10 ---------- pandas/tests/extension/test_boolean.py | 5 ----- pandas/tests/extension/test_categorical.py | 5 ----- pandas/tests/extension/test_datetime.py | 5 ----- pandas/tests/extension/test_integer.py | 9 ++------- pandas/tests/extension/test_interval.py | 9 +++------ pandas/tests/extension/test_numpy.py | 5 ----- pandas/tests/extension/test_period.py | 5 ----- pandas/tests/extension/test_sparse.py | 5 ----- pandas/tests/extension/test_string.py | 7 ------- 14 files changed, 16 insertions(+), 80 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 045df38a0421b..86cf25bfb0801 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -242,10 +242,11 @@ def init_dict(data, index, columns, dtype=None): # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): - if is_extension_array_dtype(dtype): - # GH 33623 - nan_dtype = dtype - elif dtype is None or np.issubdtype(dtype, np.flexible): + if ( + dtype is None + or is_extension_array_dtype(dtype) + or np.issubdtype(dtype, np.flexible) + ): # GH#1783 nan_dtype = object else: diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 44ba7ef9dbf86..21c27950403bb 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -15,13 +15,6 @@ def dtype(): return ArrowBoolDtype() -@pytest.fixture -def columns(): - values = np.random.randint(0, 2, size=100, dtype=bool) - values[1] = ~values[0] - return ArrowBoolArray.from_scalars(values) - - @pytest.fixture def data(): values = np.random.randint(0, 2, size=100, dtype=bool) @@ -63,8 +56,8 @@ def test_from_sequence_from_cls(self, data): super().test_from_sequence_from_cls(data) @pytest.mark.xfail(reason="bad is-na for empty data") - def test_construct_empty_dataframe(self, columns, dtype): - super().test_construct_empty_dataframe(columns, dtype) + def test_construct_empty_dataframe(self, dtype): + super().test_construct_empty_dataframe(dtype) class TestReduce(base.BaseNoReduceTests): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index f153f2d36214f..38c4fefa3bb03 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -84,6 +84,8 @@ def test_pandas_array_dtype(self, data): expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) self.assert_equal(result, expected) - def test_construct_empty_dataframe(self, columns, dtype): + def test_construct_empty_dataframe(self, dtype): # GH 33623 - pd.DataFrame(columns=columns, dtype=dtype) + result = pd.DataFrame(columns=["a"], dtype=dtype) + expected = pd.DataFrame(data=[], columns=["a"], dtype=dtype) + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index a0e694bb670c0..f4ffcb8d0f109 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -17,11 +17,6 @@ def dtype(): return DecimalDtype() -@pytest.fixture -def columns(): - return DecimalArray(make_data()) - - @pytest.fixture def data(): return DecimalArray(make_data()) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 705759a11d749..d086896fb09c3 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -15,16 +15,6 @@ def dtype(): return JSONDtype() -@pytest.fixture -def columns(): - data = make_data() - - while len(data[0]) == len(data[1]): - data = make_data() - - return JSONArray(data) - - @pytest.fixture def data(): """Length-100 PeriodArray for semantics test.""" diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 7be55093ee271..e2331b69916fb 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -33,11 +33,6 @@ def dtype(): return BooleanDtype() -@pytest.fixture -def columns(dtype): - return pd.array([True, False], dtype=dtype) - - @pytest.fixture def data(dtype): return pd.array(make_data(), dtype=dtype) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 6af1f36b40366..059d3453995bd 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -41,11 +41,6 @@ def dtype(): return CategoricalDtype() -@pytest.fixture -def columns(): - return Categorical(make_data()) - - @pytest.fixture def data(): """Length-100 array for this type. diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 86a3e627d3f10..3aa188098620d 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -13,11 +13,6 @@ def dtype(request): return DatetimeTZDtype(unit="ns", tz=request.param) -@pytest.fixture -def columns(dtype): - return DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) - - @pytest.fixture def data(dtype): data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index a239085bb4a1a..c112f52270bdb 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -54,11 +54,6 @@ def dtype(request): return request.param() -@pytest.fixture -def columns(): - return integer_array(make_data()) - - @pytest.fixture def data(dtype): return integer_array(make_data(), dtype=dtype) @@ -192,8 +187,8 @@ class TestInterface(base.BaseInterfaceTests): class TestConstructors(base.BaseConstructorsTests): @pytest.mark.xfail(reason="bad is-na for empty data") - def test_construct_empty_dataframe(self, columns, dtyoe): - super().test_construct_empty_dataframe(columns, dtype) + def test_construct_empty_dataframe(self, dtype): + super().test_construct_empty_dataframe(dtype) class TestReshaping(base.BaseReshapingTests): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index db620cf42d373..5378a26e8188b 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -35,11 +35,6 @@ def dtype(): return IntervalDtype() -@pytest.fixture -def columns(): - return IntervalArray(make_data()) - - @pytest.fixture def data(): """Length-100 PeriodArray for semantics test.""" @@ -88,7 +83,9 @@ class TestCasting(BaseInterval, base.BaseCastingTests): class TestConstructors(BaseInterval, base.BaseConstructorsTests): - pass + @pytest.mark.xfail(reason="bad is-na for empty data") + def test_construct_empty_dataframe(self, dtype): + super().test_construct_empty_dataframe(dtype) class TestGetitem(BaseInterval, base.BaseGetitemTests): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 8e8556cf39f4e..aa5a99282131a 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -37,11 +37,6 @@ def allow_in_pandas(monkeypatch): yield -@pytest.fixture -def columns(): - return PandasArray(np.arange(1, 3)) - - @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 9d922d2ffc23c..c439b8b5ed319 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -15,11 +15,6 @@ def dtype(): return PeriodDtype(freq="D") -@pytest.fixture -def columns(dtype): - return PeriodArray(np.arange(2020, 2021), freq=dtype.freq) - - @pytest.fixture def data(dtype): return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 00a4a2ff599f2..694bbee59606f 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -29,11 +29,6 @@ def dtype(): return SparseDtype() -@pytest.fixture -def columns(): - return SparseArray(np.ones(10) * 2) - - @pytest.fixture(params=[0, np.nan]) def data(request): """Length-100 PeriodArray for semantics test.""" diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index d3daad6357dcd..86aed671f1b88 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -13,13 +13,6 @@ def dtype(): return StringDtype() -@pytest.fixture -def columns(): - strings = np.random.choice(list(string.ascii_letters), size=1) - - return StringArray._from_sequence(strings) - - @pytest.fixture def data(): strings = np.random.choice(list(string.ascii_letters), size=100) From 92d75b7513915de2471da30143b5117c8fc629d9 Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Wed, 29 Apr 2020 09:00:19 +0900 Subject: [PATCH 5/8] Remove xfail in test_integer --- pandas/core/arrays/integer.py | 2 ++ pandas/tests/extension/test_integer.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5605b3fbc5dfa..00c59e0df75c5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -184,6 +184,8 @@ def coerce_to_array( ------- tuple of (values, mask) """ + values = [] if values is np.nan else values + # if values is integer numpy array, preserve it's dtype if dtype is None and hasattr(values, "dtype"): if is_integer_dtype(values.dtype): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index c112f52270bdb..725533765ca2c 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -186,9 +186,7 @@ class TestInterface(base.BaseInterfaceTests): class TestConstructors(base.BaseConstructorsTests): - @pytest.mark.xfail(reason="bad is-na for empty data") - def test_construct_empty_dataframe(self, dtype): - super().test_construct_empty_dataframe(dtype) + pass class TestReshaping(base.BaseReshapingTests): From 5fcfed64a25ed98d44ecedcab8328d332c63131d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 3 May 2020 16:38:08 +0100 Subject: [PATCH 6/8] revert changes to IntegerArray, fix IntervalArray, update test per comment --- pandas/core/arrays/integer.py | 2 -- pandas/core/internals/construction.py | 7 +++---- pandas/tests/extension/arrow/test_bool.py | 8 ++++---- pandas/tests/extension/base/constructors.py | 4 +++- pandas/tests/extension/test_interval.py | 4 +--- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 0c5a26c41cd85..743267534bfaa 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -196,8 +196,6 @@ def coerce_to_array( ------- tuple of (values, mask) """ - values = [] if values is np.nan else values - # if values is integer numpy array, preserve it's dtype if dtype is None and hasattr(values, "dtype"): if is_integer_dtype(values.dtype): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 999f4701ee54b..b2af149ccf14c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -257,10 +257,9 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): - if ( - dtype is None - or is_extension_array_dtype(dtype) - or np.issubdtype(dtype, np.flexible) + if dtype is None or ( + not is_extension_array_dtype(dtype) + and np.issubdtype(dtype, np.flexible) ): # GH#1783 nan_dtype = np.dtype(object) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 14b62b6ab65bf..48f1c34764313 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -57,10 +57,6 @@ def test_from_dtype(self, data): def test_from_sequence_from_cls(self, data): super().test_from_sequence_from_cls(data) - @pytest.mark.xfail(reason="bad is-na for empty data") - def test_construct_empty_dataframe(self, dtype): - super().test_construct_empty_dataframe(dtype) - @pytest.mark.skipif(not PY37, reason="timeout on Linux py36_locale") @pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899") def test_series_constructor_no_data_with_index(self, dtype, na_value): @@ -73,6 +69,10 @@ def test_series_constructor_scalar_na_with_index(self, dtype, na_value): # pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays super().test_series_constructor_scalar_na_with_index(dtype, na_value) + @pytest.mark.xfail(reason="raises AssertionError") + def test_construct_empty_dataframe(self, dtype): + super().test_construct_empty_dataframe(dtype) + class TestReduce(base.BaseNoReduceTests): def test_reduce_series_boolean(self): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 367d29105ca99..5c9e5dcf3ae24 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -112,5 +112,7 @@ def test_pandas_array_dtype(self, data): def test_construct_empty_dataframe(self, dtype): # GH 33623 result = pd.DataFrame(columns=["a"], dtype=dtype) - expected = pd.DataFrame(data=[], columns=["a"], dtype=dtype) + expected = pd.DataFrame( + {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + ) self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 5378a26e8188b..2411f6cfbd936 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -83,9 +83,7 @@ class TestCasting(BaseInterval, base.BaseCastingTests): class TestConstructors(BaseInterval, base.BaseConstructorsTests): - @pytest.mark.xfail(reason="bad is-na for empty data") - def test_construct_empty_dataframe(self, dtype): - super().test_construct_empty_dataframe(dtype) + pass class TestGetitem(BaseInterval, base.BaseGetitemTests): From 5ad5f8b4d5e5bb81ff00996ebb3d088264eb8c37 Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Tue, 5 May 2020 22:14:01 +0900 Subject: [PATCH 7/8] add a whatsnew. --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 47f67e9c2a4b3..96c7f7db03e0d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -755,7 +755,7 @@ ExtensionArray - Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`) - Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`) - Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`). - +- Fixed bug where :meth:`init_dict` would raise on empty input (:issue:`27953` and :issue:`33623`) Other ^^^^^ From 9e1530196f3268e137436c2e74b48ec7249307ad Mon Sep 17 00:00:00 2001 From: Matsuoka Kota Date: Tue, 5 May 2020 23:52:50 +0900 Subject: [PATCH 8/8] update the whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 96c7f7db03e0d..19f59f705a42e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -755,7 +755,7 @@ ExtensionArray - Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`) - Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`) - Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`). -- Fixed bug where :meth:`init_dict` would raise on empty input (:issue:`27953` and :issue:`33623`) +- Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) Other ^^^^^