From d31e8c1a41ae2a4425670785480d08e254701492 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 14:20:04 -0500 Subject: [PATCH 1/9] Test --- pandas/tests/frame/test_constructors.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 071d2409f1be2..092be38d742e1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2609,3 +2609,10 @@ def test_from_2d_ndarray_with_dtype(self): expected = pd.DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) + + def test_construction_from_set(self): + # https://github.com/pandas-dev/pandas/issues/32582 + result = pd.DataFrame({"a": {1, 2, 3}}) + expected = pd.DataFrame({"a": [1, 2, 3]}) + + tm.assert_frame_equal(result, expected) From a855d1157c2f3170e3269795c32298b0796dfb7d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 14:22:35 -0500 Subject: [PATCH 2/9] Cast set --- pandas/core/construction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e2d8fba8d4148..06928f0058aa0 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -434,7 +434,9 @@ def sanitize_array( subarr = subarr.copy() return subarr - elif isinstance(data, (list, tuple)) and len(data) > 0: + elif isinstance(data, (list, tuple, set)) and len(data) > 0: + if isinstance(data, set): + data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: From 8f13e110ffd533634d318778c6c63fea016b7ffa Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 14:22:44 -0500 Subject: [PATCH 3/9] Doc --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..bed74c7933b19 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -352,6 +352,7 @@ Other instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) - Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`) - Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`) +- Bug in :class:`DataFrame` expanding sets when passed to the constructor (:issue:`32582`) .. --------------------------------------------------------------------------- From 0e473adba646d48970d4a038a55bee1f40b5f02c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 19:41:51 -0500 Subject: [PATCH 4/9] Comment --- pandas/core/construction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 06928f0058aa0..e7a20cb6ae8b4 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -435,6 +435,8 @@ def sanitize_array( return subarr elif isinstance(data, (list, tuple, set)) and len(data) > 0: + # We can't use is_list_like since range and array.array inputs + # are handled differently if isinstance(data, set): data = list(data) if dtype is not None: From 76984269a4209ba3b7d4da7d63772729bbfd315f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 21:18:08 -0500 Subject: [PATCH 5/9] Revert --- doc/source/whatsnew/v1.1.0.rst | 1 - pandas/core/construction.py | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bed74c7933b19..e745bf3f5feed 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -352,7 +352,6 @@ Other instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) - Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`) - Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`) -- Bug in :class:`DataFrame` expanding sets when passed to the constructor (:issue:`32582`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e7a20cb6ae8b4..e2d8fba8d4148 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -434,11 +434,7 @@ def sanitize_array( subarr = subarr.copy() return subarr - elif isinstance(data, (list, tuple, set)) and len(data) > 0: - # We can't use is_list_like since range and array.array inputs - # are handled differently - if isinstance(data, set): - data = list(data) + elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: From f35aca579a6e3b9da5c43372894bffac8bbb04c3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 21:21:24 -0500 Subject: [PATCH 6/9] Update test --- pandas/tests/frame/test_constructors.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 092be38d742e1..e9cf08f714ded 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2610,9 +2610,8 @@ def test_from_2d_ndarray_with_dtype(self): expected = pd.DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) - def test_construction_from_set(self): + def test_construction_from_set_raises(self): # https://github.com/pandas-dev/pandas/issues/32582 - result = pd.DataFrame({"a": {1, 2, 3}}) - expected = pd.DataFrame({"a": [1, 2, 3]}) - - tm.assert_frame_equal(result, expected) + msg = "Set type is unordered" + with pytest.raises(TypeError, match=msg): + pd.DataFrame({"a": {1, 2, 3}}) From 699bcfcf697c9e929feb652db155e7111140e2f3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 21:25:54 -0500 Subject: [PATCH 7/9] Raise for sets --- pandas/core/construction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e2d8fba8d4148..8e125ad62c9fb 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -446,6 +446,8 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) + elif isinstance(data, set): + raise TypeError("Set type is unordered") else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) From 5312ecd1f36e1f794970eb5e241a9fd93b548b44 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 10 Mar 2020 21:26:06 -0500 Subject: [PATCH 8/9] New whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..ac8606a70033c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -238,7 +238,7 @@ Conversion ^^^^^^^^^^ - Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) - Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`32402`) -- +- Bug in :class:`DataFrame` construction where sets would be duplicated rather than raising (:issue:`32582`) Strings ^^^^^^^ From 69faf9d67bfbd7886b09a6864be1256d9a96c6f3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 11 Mar 2020 09:57:46 -0500 Subject: [PATCH 9/9] Use abc.Set --- pandas/core/construction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8e125ad62c9fb..c9754ff588896 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -5,6 +5,7 @@ These should not depend on core.internals. """ +from collections import abc from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -446,7 +447,7 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) - elif isinstance(data, set): + elif isinstance(data, abc.Set): raise TypeError("Set type is unordered") else: subarr = _try_cast(data, dtype, copy, raise_cast_failure)