diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9af6c36cc4e4d..b9f90bf750482 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -108,8 +108,7 @@ header : int or list of ints, default ``'infer'`` line of data rather than the first line of the file. names : array-like, default ``None`` List of column names to use. If file contains no header row, then you should - explicitly pass ``header=None``. Duplicates in this list will cause - a ``UserWarning`` to be issued. + explicitly pass ``header=None``. Duplicates in this list are not allowed. index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. If a sequence of int / str is given, a diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 52ea9e8839e45..1dad8769a6b39 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -218,7 +218,7 @@ contained the values ``[0, 3]``. **New behavior**: .. ipython:: python - :okwarning: + :okexcept: pd.read_csv(StringIO(data), names=names) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2030bb4d974c3..7495d7d7313fe 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -567,6 +567,7 @@ Other API changes - Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) - :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) - :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`) +- Passing duplicate ``names`` in :meth:`read_csv` will now raise a ``ValueError`` (:issue:`17346`) .. _whatsnew_0250.deprecations: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 73d47af5922f7..8fe0e466e7c0a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -99,8 +99,8 @@ data rather than the first line of the file. names : array-like, optional List of column names to use. If file contains no header row, then you - should explicitly pass ``header=None``. Duplicates in this list will cause - a ``UserWarning`` to be issued. + should explicitly pass ``header=None``. Duplicates in this list are not + allowed. index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. If a sequence of int / str is given, a @@ -394,10 +394,7 @@ def _validate_names(names): if names is not None: if len(names) != len(set(names)): - msg = ("Duplicate names specified. This " - "will raise an error in the future.") - warnings.warn(msg, UserWarning, stacklevel=3) - + raise ValueError('Duplicate names are not allowed.') return names diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 1d3c935e9101b..738b9d9693750 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -424,18 +424,17 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): tm.assert_frame_equal(result, expected) -def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers): +def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): # see gh-9424 parser = all_parsers expected = concat([Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], axis=1) expected.index = expected.index.astype(object) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + with pytest.raises(ValueError, match='Duplicate names'): data = "" - result = parser.read_csv(StringIO(data), names=["one", "one"], - dtype={0: "u1", 1: "f"}) - tm.assert_frame_equal(result, expected) + parser.read_csv(StringIO(data), names=["one", "one"], + dtype={0: "u1", 1: "f"}) def test_raise_on_passed_int_dtype_with_nas(all_parsers): diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index 3b00acd8598fa..6ab761398631b 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -37,17 +37,13 @@ def test_basic_names(all_parsers): tm.assert_frame_equal(result, expected) -def test_basic_names_warn(all_parsers): +def test_basic_names_raise(all_parsers): # See gh-7160 parser = all_parsers data = "0,1,2\n3,4,5" - expected = DataFrame([[0, 1, 2], [3, 4, 5]], - columns=["a", "b", "a.1"]) - - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - result = parser.read_csv(StringIO(data), names=["a", "b", "a"]) - tm.assert_frame_equal(result, expected) + with pytest.raises(ValueError, match='Duplicate names'): + parser.read_csv(StringIO(data), names=["a", "b", "a"]) @pytest.mark.parametrize("data,expected", [ @@ -90,9 +86,8 @@ def test_thorough_mangle_names(all_parsers, data, names, expected): # see gh-17095 parser = all_parsers - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - result = parser.read_csv(StringIO(data), names=names) - tm.assert_frame_equal(result, expected) + with pytest.raises(ValueError, match='Duplicate names'): + parser.read_csv(StringIO(data), names=names) def test_mangled_unnamed_placeholders(all_parsers):