Skip to content

Commit e145443

Browse files
mroeschkejreback
authored andcommitted
ERR: Raise on duplicates names in read_csv (#27175)
1 parent 7ec7c9e commit e145443

File tree

6 files changed

+15
-24
lines changed

6 files changed

+15
-24
lines changed

doc/source/user_guide/io.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ header : int or list of ints, default ``'infer'``
108108
line of data rather than the first line of the file.
109109
names : array-like, default ``None``
110110
List of column names to use. If file contains no header row, then you should
111-
explicitly pass ``header=None``. Duplicates in this list will cause
112-
a ``UserWarning`` to be issued.
111+
explicitly pass ``header=None``. Duplicates in this list are not allowed.
113112
index_col : int, str, sequence of int / str, or False, default ``None``
114113
Column(s) to use as the row labels of the ``DataFrame``, either given as
115114
string name or column index. If a sequence of int / str is given, a

doc/source/whatsnew/v0.19.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ contained the values ``[0, 3]``.
218218
**New behavior**:
219219

220220
.. ipython:: python
221-
:okwarning:
221+
:okexcept:
222222
223223
pd.read_csv(StringIO(data), names=names)
224224

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ Other API changes
567567
- Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`)
568568
- :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`)
569569
- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`)
570+
- Passing duplicate ``names`` in :meth:`read_csv` will now raise a ``ValueError`` (:issue:`17346`)
570571

571572
.. _whatsnew_0250.deprecations:
572573

pandas/io/parsers.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@
9999
data rather than the first line of the file.
100100
names : array-like, optional
101101
List of column names to use. If file contains no header row, then you
102-
should explicitly pass ``header=None``. Duplicates in this list will cause
103-
a ``UserWarning`` to be issued.
102+
should explicitly pass ``header=None``. Duplicates in this list are not
103+
allowed.
104104
index_col : int, str, sequence of int / str, or False, default ``None``
105105
Column(s) to use as the row labels of the ``DataFrame``, either given as
106106
string name or column index. If a sequence of int / str is given, a
@@ -394,10 +394,7 @@ def _validate_names(names):
394394

395395
if names is not None:
396396
if len(names) != len(set(names)):
397-
msg = ("Duplicate names specified. This "
398-
"will raise an error in the future.")
399-
warnings.warn(msg, UserWarning, stacklevel=3)
400-
397+
raise ValueError('Duplicate names are not allowed.')
401398
return names
402399

403400

pandas/tests/io/parser/test_dtypes.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -424,18 +424,17 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
424424
tm.assert_frame_equal(result, expected)
425425

426426

427-
def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers):
427+
def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
428428
# see gh-9424
429429
parser = all_parsers
430430
expected = concat([Series([], name="one", dtype="u1"),
431431
Series([], name="one.1", dtype="f")], axis=1)
432432
expected.index = expected.index.astype(object)
433433

434-
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
434+
with pytest.raises(ValueError, match='Duplicate names'):
435435
data = ""
436-
result = parser.read_csv(StringIO(data), names=["one", "one"],
437-
dtype={0: "u1", 1: "f"})
438-
tm.assert_frame_equal(result, expected)
436+
parser.read_csv(StringIO(data), names=["one", "one"],
437+
dtype={0: "u1", 1: "f"})
439438

440439

441440
def test_raise_on_passed_int_dtype_with_nas(all_parsers):

pandas/tests/io/parser/test_mangle_dupes.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,13 @@ def test_basic_names(all_parsers):
3737
tm.assert_frame_equal(result, expected)
3838

3939

40-
def test_basic_names_warn(all_parsers):
40+
def test_basic_names_raise(all_parsers):
4141
# See gh-7160
4242
parser = all_parsers
4343

4444
data = "0,1,2\n3,4,5"
45-
expected = DataFrame([[0, 1, 2], [3, 4, 5]],
46-
columns=["a", "b", "a.1"])
47-
48-
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
49-
result = parser.read_csv(StringIO(data), names=["a", "b", "a"])
50-
tm.assert_frame_equal(result, expected)
45+
with pytest.raises(ValueError, match='Duplicate names'):
46+
parser.read_csv(StringIO(data), names=["a", "b", "a"])
5147

5248

5349
@pytest.mark.parametrize("data,expected", [
@@ -90,9 +86,8 @@ def test_thorough_mangle_names(all_parsers, data, names, expected):
9086
# see gh-17095
9187
parser = all_parsers
9288

93-
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
94-
result = parser.read_csv(StringIO(data), names=names)
95-
tm.assert_frame_equal(result, expected)
89+
with pytest.raises(ValueError, match='Duplicate names'):
90+
parser.read_csv(StringIO(data), names=names)
9691

9792

9893
def test_mangled_unnamed_placeholders(all_parsers):

0 commit comments

Comments
 (0)