Skip to content

Commit 3854972

Browse files
phoflJulianWgs
authored andcommitted
BUG: Raise ValueError if names and prefix are both defined (pandas-dev#41446)
1 parent 812fcbd commit 3854972

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,7 @@ I/O
842842
- Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
843843
- Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
844844
- Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
845+
- Bug in :func:`read_csv` and :func:`read_table` silently ignoring ``prefix`` if ``names`` and ``prefix`` are defined, now raising ``ValueError`` (:issue:`39123`)
845846
- Bug in :func:`read_csv` and :func:`read_excel` not respecting dtype for duplicated column name when ``mangle_dupe_cols`` is set to ``True`` (:issue:`35211`)
846847
- Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`)
847848
- Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)

pandas/io/parsers/readers.py

+35-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import pandas._libs.lib as lib
2121
from pandas._libs.parsers import STR_NA_VALUES
2222
from pandas._typing import (
23+
ArrayLike,
2324
DtypeArg,
2425
FilePathOrBuffer,
2526
StorageOptions,
@@ -485,11 +486,11 @@ def read_csv(
485486
delimiter=None,
486487
# Column and Index Locations and Names
487488
header="infer",
488-
names=None,
489+
names=lib.no_default,
489490
index_col=None,
490491
usecols=None,
491492
squeeze=False,
492-
prefix=None,
493+
prefix=lib.no_default,
493494
mangle_dupe_cols=True,
494495
# General Parsing Configuration
495496
dtype: Optional[DtypeArg] = None,
@@ -546,7 +547,14 @@ def read_csv(
546547
del kwds["sep"]
547548

548549
kwds_defaults = _refine_defaults_read(
549-
dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","}
550+
dialect,
551+
delimiter,
552+
delim_whitespace,
553+
engine,
554+
sep,
555+
names,
556+
prefix,
557+
defaults={"delimiter": ","},
550558
)
551559
kwds.update(kwds_defaults)
552560

@@ -567,11 +575,11 @@ def read_table(
567575
delimiter=None,
568576
# Column and Index Locations and Names
569577
header="infer",
570-
names=None,
578+
names=lib.no_default,
571579
index_col=None,
572580
usecols=None,
573581
squeeze=False,
574-
prefix=None,
582+
prefix=lib.no_default,
575583
mangle_dupe_cols=True,
576584
# General Parsing Configuration
577585
dtype: Optional[DtypeArg] = None,
@@ -627,7 +635,14 @@ def read_table(
627635
del kwds["sep"]
628636

629637
kwds_defaults = _refine_defaults_read(
630-
dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"}
638+
dialect,
639+
delimiter,
640+
delim_whitespace,
641+
engine,
642+
sep,
643+
names,
644+
prefix,
645+
defaults={"delimiter": "\t"},
631646
)
632647
kwds.update(kwds_defaults)
633648

@@ -1174,6 +1189,8 @@ def _refine_defaults_read(
11741189
delim_whitespace: bool,
11751190
engine: str,
11761191
sep: Union[str, object],
1192+
names: Union[Optional[ArrayLike], object],
1193+
prefix: Union[Optional[str], object],
11771194
defaults: Dict[str, Any],
11781195
):
11791196
"""Validate/refine default values of input parameters of read_csv, read_table.
@@ -1199,6 +1216,12 @@ def _refine_defaults_read(
11991216
sep : str or object
12001217
A delimiter provided by the user (str) or a sentinel value, i.e.
12011218
pandas._libs.lib.no_default.
1219+
names : array-like, optional
1220+
List of column names to use. If the file contains a header row,
1221+
then you should explicitly pass ``header=0`` to override the column names.
1222+
Duplicates in this list are not allowed.
1223+
prefix : str, optional
1224+
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
12021225
defaults: dict
12031226
Default values of input parameters.
12041227
@@ -1232,6 +1255,12 @@ def _refine_defaults_read(
12321255
sep is lib.no_default or sep == delim_default
12331256
)
12341257

1258+
if names is not lib.no_default and prefix is not lib.no_default:
1259+
raise ValueError("Specified named and prefix; you can only specify one.")
1260+
1261+
kwds["names"] = None if names is lib.no_default else names
1262+
kwds["prefix"] = None if prefix is lib.no_default else prefix
1263+
12351264
# Alias sep -> delimiter.
12361265
if delimiter is None:
12371266
delimiter = sep

pandas/tests/io/parser/common/test_common_basic.py

+12
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,18 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
740740
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
741741

742742

743+
@pytest.mark.parametrize("func", ["read_csv", "read_table"])
744+
@pytest.mark.parametrize("prefix", [None, "x"])
745+
@pytest.mark.parametrize("names", [None, ["a"]])
746+
def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func):
747+
# GH#39123
748+
f = StringIO("a,b\n1,2")
749+
parser = all_parsers
750+
msg = "Specified named and prefix; you can only specify one."
751+
with pytest.raises(ValueError, match=msg):
752+
getattr(parser, func)(f, names=names, prefix=prefix)
753+
754+
743755
def test_dict_keys_as_names(all_parsers):
744756
# GH: 36928
745757
data = "1,2"

0 commit comments

Comments
 (0)