diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 48eff0543ad4d..e9e87ec202ef5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -241,7 +241,7 @@ Conversion ^^^^^^^^^^ - Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) - Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`32402`) -- +- Bug in :class:`DataFrame` construction where sets would be duplicated rather than raising (:issue:`32582`) Strings ^^^^^^^ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e2d8fba8d4148..c9754ff588896 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -5,6 +5,7 @@ These should not depend on core.internals. """ +from collections import abc from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -446,6 +447,8 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) + elif isinstance(data, abc.Set): + raise TypeError("Set type is unordered") else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d938c0f6f1066..924952ad334c4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2604,3 +2604,9 @@ def test_from_2d_ndarray_with_dtype(self): expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) + + def test_construction_from_set_raises(self): + # https://github.com/pandas-dev/pandas/issues/32582 + msg = "Set type is unordered" + with pytest.raises(TypeError, match=msg): + pd.DataFrame({"a": {1, 2, 3}})