From 5623030846cf7c26d28113c544a941cce901e65b Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 13 May 2021 00:48:31 +0200 Subject: [PATCH 1/4] BUG: Raise ValueError if names and prefix are both defined --- pandas/tests/io/parser/common/test_common_basic.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index fe68597d11f0b..ed395df53432e 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -740,6 +740,18 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): parser.read_table(f, delim_whitespace=True, delimiter=delimiter) +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +@pytest.mark.parametrize("prefix", [None, "x"]) +@pytest.mark.parametrize("names", [None, ["a"]]) +def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func): + # GH#39123 + f = StringIO("a,b\n1,2") + parser = all_parsers + msg = "Specified named and prefix; you can only specify one." + with pytest.raises(ValueError, match=msg): + getattr(parser, func)(f, names=names, prefix=prefix) + + def test_dict_keys_as_names(all_parsers): # GH: 36928 data = "1,2" From 5893eeda617f9245ad0f4f3d557f35908c39fd9d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 13 May 2021 00:51:03 +0200 Subject: [PATCH 2/4] BUG: Raise ValueError if names and prefix are both defined --- pandas/io/parsers/readers.py | 41 ++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 55e3e14a0969d..209be4295a1b3 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -20,6 +20,7 @@ import pandas._libs.lib as lib from pandas._libs.parsers import STR_NA_VALUES from pandas._typing import ( + ArrayLike, DtypeArg, FilePathOrBuffer, StorageOptions, @@ -485,11 +486,11 @@ def read_csv( delimiter=None, # Column and Index Locations and Names header="infer", - names=None, + names=lib.no_default, index_col=None, usecols=None, squeeze=False, - prefix=None, + prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration dtype: Optional[DtypeArg] = None, @@ -546,7 +547,14 @@ def read_csv( del kwds["sep"] kwds_defaults = _refine_defaults_read( - dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","} + dialect, + delimiter, + delim_whitespace, + engine, + sep, + names, + prefix, + defaults={"delimiter": ","}, ) kwds.update(kwds_defaults) @@ -567,11 +575,11 @@ def read_table( delimiter=None, # Column and Index Locations and Names header="infer", - names=None, + names=lib.no_default, index_col=None, usecols=None, squeeze=False, - prefix=None, + prefix=lib.no_default, mangle_dupe_cols=True, # General Parsing Configuration dtype: Optional[DtypeArg] = None, @@ -627,7 +635,14 @@ def read_table( del kwds["sep"] kwds_defaults = _refine_defaults_read( - dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"} + dialect, + delimiter, + delim_whitespace, + engine, + sep, + names, + prefix, + defaults={"delimiter": "\t"}, ) kwds.update(kwds_defaults) @@ -1174,6 +1189,8 @@ def _refine_defaults_read( delim_whitespace: bool, engine: str, sep: Union[str, object], + names: Optional[ArrayLike], + prefix: Optional[str], defaults: Dict[str, Any], ): """Validate/refine default values of input parameters of read_csv, read_table. @@ -1199,6 +1216,12 @@ def _refine_defaults_read( sep : str or object A delimiter provided by the user (str) or a sentinel value, i.e. pandas._libs.lib.no_default. + names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. + prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... defaults: dict Default values of input parameters. @@ -1232,6 +1255,12 @@ def _refine_defaults_read( sep is lib.no_default or sep == delim_default ) + if names is not lib.no_default and prefix is not lib.no_default: + raise ValueError("Specified named and prefix; you can only specify one.") + + kwds["names"] = None if names is lib.no_default else names + kwds["prefix"] = None if prefix is lib.no_default else prefix + # Alias sep -> delimiter. if delimiter is None: delimiter = sep From f93deab6eb51cae2429e5fdddfa6966b30f1f0a2 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 13 May 2021 00:51:13 +0200 Subject: [PATCH 3/4] BUG: Raise ValueError if names and prefix are both defined --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 84f9dae8a0850..6fbaa85c7adcd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -840,6 +840,7 @@ I/O - Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`) - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`) - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`) +- Bug in :func:`read_csv` and :func:`read_table` silently ignoring ``prefix`` if ``names`` and ``prefix`` are defined, now raising ``ValueError`` (:issue:`39123`) - Bug in :func:`read_csv` and :func:`read_excel` not respecting dtype for duplicated column name when ``mangle_dupe_cols`` is set to ``True`` (:issue:`35211`) - Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`) - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`) From 1c537971f0df96e44e9f1cfd775f0d0b999eb3dc Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 13 May 2021 16:51:46 +0200 Subject: [PATCH 4/4] Fix typing bugs --- pandas/io/parsers/readers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 209be4295a1b3..9f7539f575308 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1189,8 +1189,8 @@ def _refine_defaults_read( delim_whitespace: bool, engine: str, sep: Union[str, object], - names: Optional[ArrayLike], - prefix: Optional[str], + names: Union[Optional[ArrayLike], object], + prefix: Union[Optional[str], object], defaults: Dict[str, Any], ): """Validate/refine default values of input parameters of read_csv, read_table.