diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 60aa1759958f6..9d151c78b2048 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1121,6 +1121,7 @@ Other - :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`) - Bug in :class:`Tick` comparisons raising ``TypeError`` when comparing against timedelta-like objects (:issue:`34088`) - Bug in :class:`Tick` multiplication raising ``TypeError`` when multiplying by a float (:issue:`34486`) +- Passing a `set` as `names` argument to :func:`pandas.read_csv`, :func:`pandas.read_table`, or :func:`pandas.read_fwf` will raise ``ValueError: Names should be an ordered collection.`` (:issue:`34946`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 679cf4c2d8929..62347f7110d76 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -397,7 +397,8 @@ def _validate_integer(name, val, min_val=0): def _validate_names(names): """ - Raise ValueError if the `names` parameter contains duplicates. + Raise ValueError if the `names` parameter contains duplicates or has an + invalid data type. Parameters ---------- @@ -407,11 +408,13 @@ def _validate_names(names): Raises ------ ValueError - If names are not unique. + If names are not unique or are not ordered (e.g. set). """ if names is not None: if len(names) != len(set(names)): raise ValueError("Duplicate names are not allowed.") + if not is_list_like(names, allow_sets=False): + raise ValueError("Names should be an ordered collection.") def _read(filepath_or_buffer: FilePathOrBuffer, kwds): diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index e38fcf1380220..e6e868689b060 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2135,3 +2135,13 @@ def test_no_header_two_extra_columns(all_parsers): parser = all_parsers df = parser.read_csv(stream, header=None, names=column_names, index_col=False) tm.assert_frame_equal(df, ref) + + +def test_read_csv_names_not_accepting_sets(all_parsers): + # GH 34946 + data = """\ + 1,2,3 + 4,5,6\n""" + parser = all_parsers + with pytest.raises(ValueError, match="Names should be an ordered collection."): + parser.read_csv(StringIO(data), names=set("QAZ"))