Skip to content

BUG: Create empty dataframe with string dtype fails #33651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
5 changes: 4 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,10 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):

# no obvious "empty" int column
if missing.any() and not is_integer_dtype(dtype):
if dtype is None or np.issubdtype(dtype, np.flexible):
if dtype is None or (
not is_extension_array_dtype(dtype)
and np.issubdtype(dtype, np.flexible)
):
# GH#1783
nan_dtype = np.dtype(object)
else:
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/arrow/test_bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
# pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays
super().test_series_constructor_scalar_na_with_index(dtype, na_value)

@pytest.mark.xfail(reason="raises AssertionError")
def test_construct_empty_dataframe(self, dtype):
super().test_construct_empty_dataframe(dtype)


class TestReduce(base.BaseNoReduceTests):
def test_reduce_series_boolean(self):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/base/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,11 @@ def test_pandas_array_dtype(self, data):
result = pd.array(data, dtype=np.dtype(object))
expected = pd.arrays.PandasArray(np.asarray(data, dtype=object))
self.assert_equal(result, expected)

def test_construct_empty_dataframe(self, dtype):
# GH 33623
result = pd.DataFrame(columns=["a"], dtype=dtype)
expected = pd.DataFrame(
{"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object")
)
self.assert_frame_equal(result, expected)