Skip to content

Commit 66318df

Browse files
Dr-Irvnoatamir
authored andcommitted
API: Disallow sets as index and columns argument in DataFrame constructor (pandas-dev#47231)
1 parent d4efd3d commit 66318df

File tree

8 files changed

+27
-20
lines changed

8 files changed

+27
-20
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ Other API changes
491491
- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`)
492492
- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`)
493493
- When :func:`read_sas` previously returned ``None``, it now returns an empty :class:`DataFrame` (:issue:`47410`)
494-
-
494+
- :class:`DataFrame` constructor raises if ``index`` or ``columns`` arguments are sets (:issue:`47215`)
495495

496496
.. ---------------------------------------------------------------------------
497497
.. _whatsnew_150.deprecations:

pandas/_typing.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
TYPE_CHECKING,
1111
Any,
1212
Callable,
13-
Collection,
1413
Dict,
1514
Hashable,
1615
Iterator,
@@ -115,7 +114,7 @@
115114
Ordered = Optional[bool]
116115
JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
117116
Frequency = Union[str, "DateOffset"]
118-
Axes = Collection[Any]
117+
Axes = Union[AnyArrayLike, List, range]
119118

120119
RandomState = Union[
121120
int,

pandas/core/frame.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,12 @@ def __init__(
626626

627627
manager = get_option("mode.data_manager")
628628

629+
# GH47215
630+
if index is not None and isinstance(index, set):
631+
raise ValueError("index cannot be a set")
632+
if columns is not None and isinstance(columns, set):
633+
raise ValueError("columns cannot be a set")
634+
629635
if copy is None:
630636
if isinstance(data, dict):
631637
# retain pre-GH#38939 default behavior
@@ -729,10 +735,7 @@ def __init__(
729735
if not isinstance(data, np.ndarray) and treat_as_nested(data):
730736
# exclude ndarray as we may have cast it a few lines above
731737
if columns is not None:
732-
# error: Argument 1 to "ensure_index" has incompatible type
733-
# "Collection[Any]"; expected "Union[Union[Union[ExtensionArray,
734-
# ndarray], Index, Series], Sequence[Any]]"
735-
columns = ensure_index(columns) # type: ignore[arg-type]
738+
columns = ensure_index(columns)
736739
arrays, columns, index = nested_data_to_arrays(
737740
# error: Argument 3 to "nested_data_to_arrays" has incompatible
738741
# type "Optional[Collection[Any]]"; expected "Optional[Index]"
@@ -770,14 +773,8 @@ def __init__(
770773
if index is None or columns is None:
771774
raise ValueError("DataFrame constructor not properly called!")
772775

773-
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
774-
# expected "Union[Union[Union[ExtensionArray, ndarray],
775-
# Index, Series], Sequence[Any]]"
776-
index = ensure_index(index) # type: ignore[arg-type]
777-
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
778-
# expected "Union[Union[Union[ExtensionArray, ndarray],
779-
# Index, Series], Sequence[Any]]"
780-
columns = ensure_index(columns) # type: ignore[arg-type]
776+
index = ensure_index(index)
777+
columns = ensure_index(columns)
781778

782779
if not dtype:
783780
dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t):
766766
obj.set_axis(labels, axis=axis, inplace=True)
767767
return obj
768768

769-
def _set_axis(self, axis: int, labels: AnyArrayLike | Sequence) -> None:
769+
def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
770770
labels = ensure_index(labels)
771771
self._mgr.set_axis(axis, labels)
772772
self._clear_item_cache()

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@
4343
tz_compare,
4444
)
4545
from pandas._typing import (
46-
AnyArrayLike,
4746
ArrayLike,
47+
Axes,
4848
Dtype,
4949
DtypeObj,
5050
F,
@@ -7281,7 +7281,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
72817281
return MultiIndex.from_arrays(sequences, names=names)
72827282

72837283

7284-
def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Index:
7284+
def ensure_index(index_like: Axes, copy: bool = False) -> Index:
72857285
"""
72867286
Ensure that we have an index from some index-like object.
72877287

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]:
561561
def _can_hold_na(self) -> bool:
562562
return self._mgr._can_hold_na
563563

564-
def _set_axis(self, axis: int, labels: AnyArrayLike | Sequence) -> None:
564+
def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
565565
"""
566566
Override generic, we want to set the _typ here.
567567

pandas/io/formats/format.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from pandas._libs.tslibs.nattype import NaTType
5252
from pandas._typing import (
5353
ArrayLike,
54+
Axes,
5455
ColspaceArgType,
5556
ColspaceType,
5657
CompressionOptions,
@@ -689,7 +690,9 @@ def _initialize_justify(self, justify: str | None) -> str:
689690

690691
def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
691692
if columns is not None:
692-
cols = ensure_index(columns)
693+
# GH 47231 - columns doesn't have to be `Sequence[str]`
694+
# Will fix in later PR
695+
cols = ensure_index(cast(Axes, columns))
693696
self.frame = self.frame[cols]
694697
return cols
695698
else:

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -3001,6 +3001,14 @@ def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
30013001
with pytest.raises(ValueError, match=msg):
30023002
DataFrame(arr2, columns=["foo", "bar"])
30033003

3004+
def test_columns_indexes_raise_on_sets(self):
3005+
# GH 47215
3006+
data = [[1, 2, 3], [4, 5, 6]]
3007+
with pytest.raises(ValueError, match="index cannot be a set"):
3008+
DataFrame(data, index={"a", "b"})
3009+
with pytest.raises(ValueError, match="columns cannot be a set"):
3010+
DataFrame(data, columns={"a", "b", "c"})
3011+
30043012

30053013
def get1(obj): # TODO: make a helper in tm?
30063014
if isinstance(obj, Series):

0 commit comments

Comments
 (0)