Skip to content

Commit 57823e6

Browse files
phoflnoatamir
authored andcommitted
DEP: Enforce deprecations of read_csv keywords (pandas-dev#48849)
1 parent 6cb366b commit 57823e6

File tree

12 files changed

+45
-523
lines changed

12 files changed

+45
-523
lines changed

doc/source/user_guide/io.rst

-69
Original file line numberDiff line numberDiff line change
@@ -154,25 +154,6 @@ usecols : list-like or callable, default ``None``
154154
Using this parameter results in much faster parsing time and lower memory usage
155155
when using the c engine. The Python engine loads the data first before deciding
156156
which columns to drop.
157-
squeeze : boolean, default ``False``
158-
If the parsed data only contains one column then return a ``Series``.
159-
160-
.. deprecated:: 1.4.0
161-
Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze
162-
the data.
163-
prefix : str, default ``None``
164-
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
165-
166-
.. deprecated:: 1.4.0
167-
Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
168-
169-
.. ipython:: python
170-
171-
data = "col1,col2,col3\na,b,1"
172-
173-
df = pd.read_csv(StringIO(data))
174-
df.columns = [f"pre_{col}" for col in df.columns]
175-
df
176157

177158
mangle_dupe_cols : boolean, default ``True``
178159
Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'.
@@ -395,23 +376,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
395376
Error handling
396377
++++++++++++++
397378

398-
error_bad_lines : boolean, optional, default ``None``
399-
Lines with too many fields (e.g. a csv line with too many commas) will by
400-
default cause an exception to be raised, and no ``DataFrame`` will be
401-
returned. If ``False``, then these "bad lines" will dropped from the
402-
``DataFrame`` that is returned. See :ref:`bad lines <io.bad_lines>`
403-
below.
404-
405-
.. deprecated:: 1.3.0
406-
The ``on_bad_lines`` parameter should be used instead to specify behavior upon
407-
encountering a bad line instead.
408-
warn_bad_lines : boolean, optional, default ``None``
409-
If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for
410-
each "bad line" will be output.
411-
412-
.. deprecated:: 1.3.0
413-
The ``on_bad_lines`` parameter should be used instead to specify behavior upon
414-
encountering a bad line instead.
415379
on_bad_lines : {{'error', 'warn', 'skip'}}, default 'error'
416380
Specifies what to do upon encountering a bad line (a line with too many fields).
417381
Allowed values are :
@@ -1221,37 +1185,6 @@ Infinity
12211185
``inf`` like values will be parsed as ``np.inf`` (positive infinity), and ``-inf`` as ``-np.inf`` (negative infinity).
12221186
These will ignore the case of the value, meaning ``Inf``, will also be parsed as ``np.inf``.
12231187

1224-
1225-
Returning Series
1226-
''''''''''''''''
1227-
1228-
Using the ``squeeze`` keyword, the parser will return output with a single column
1229-
as a ``Series``:
1230-
1231-
.. deprecated:: 1.4.0
1232-
Users should append ``.squeeze("columns")`` to the DataFrame returned by
1233-
``read_csv`` instead.
1234-
1235-
.. ipython:: python
1236-
:okwarning:
1237-
1238-
data = "level\nPatient1,123000\nPatient2,23000\nPatient3,1234018"
1239-
1240-
with open("tmp.csv", "w") as fh:
1241-
fh.write(data)
1242-
1243-
print(open("tmp.csv").read())
1244-
1245-
output = pd.read_csv("tmp.csv", squeeze=True)
1246-
output
1247-
1248-
type(output)
1249-
1250-
.. ipython:: python
1251-
:suppress:
1252-
1253-
os.remove("tmp.csv")
1254-
12551188
.. _io.boolean:
12561189

12571190
Boolean values
@@ -1708,8 +1641,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the
17081641
* ``thousands``
17091642
* ``memory_map``
17101643
* ``dialect``
1711-
* ``warn_bad_lines``
1712-
* ``error_bad_lines``
17131644
* ``on_bad_lines``
17141645
* ``delim_whitespace``
17151646
* ``quoting``

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ Removal of prior version deprecations/changes
213213
- Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`)
214214
- Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`)
215215
- Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`)
216+
- Removed arguments ``prefix``, ``squeeze``, ``error_bad_lines`` and ``warn_bad_lines`` from :func:`read_csv` (:issue:`40413`, :issue:`43427`)
216217
- Removed argument ``datetime_is_numeric`` from :meth:`DataFrame.describe` and :meth:`Series.describe` as datetime data will always be summarized as numeric data (:issue:`34798`)
217218
- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`)
218219
- Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`)

pandas/io/parsers/arrow_parser_wrapper.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
9595
multi_index_named = True
9696
if self.header is None:
9797
if self.names is None:
98-
if self.prefix is not None:
99-
self.names = [f"{self.prefix}{i}" for i in range(num_cols)]
100-
elif self.header is None:
98+
if self.header is None:
10199
self.names = range(num_cols)
102100
if len(self.names) != num_cols:
103101
# usecols is passed through to pyarrow, we only handle index col here

pandas/io/parsers/base_parser.py

-10
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ def __init__(self, kwds) -> None:
9797

9898
self.names = kwds.get("names")
9999
self.orig_names: list | None = None
100-
self.prefix = kwds.pop("prefix", None)
101100

102101
self.index_col = kwds.get("index_col", None)
103102
self.unnamed_cols: set = set()
@@ -155,11 +154,6 @@ def __init__(self, kwds) -> None:
155154
"index_col must only contain row numbers "
156155
"when specifying a multi-index header"
157156
)
158-
elif self.header is not None and self.prefix is not None:
159-
# GH 27394
160-
raise ValueError(
161-
"Argument prefix must be None if argument header is not None"
162-
)
163157

164158
self._name_processed = False
165159

@@ -1161,7 +1155,6 @@ def converter(*date_cols):
11611155
"header": "infer",
11621156
"index_col": None,
11631157
"names": None,
1164-
"prefix": None,
11651158
"skiprows": None,
11661159
"skipfooter": 0,
11671160
"nrows": None,
@@ -1185,15 +1178,12 @@ def converter(*date_cols):
11851178
"chunksize": None,
11861179
"verbose": False,
11871180
"encoding": None,
1188-
"squeeze": None,
11891181
"compression": None,
11901182
"mangle_dupe_cols": True,
11911183
"infer_datetime_format": False,
11921184
"skip_blank_lines": True,
11931185
"encoding_errors": "strict",
11941186
"on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
1195-
"error_bad_lines": None,
1196-
"warn_bad_lines": None,
11971187
"use_nullable_dtypes": False,
11981188
}
11991189

pandas/io/parsers/c_parser_wrapper.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,6 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
7171
"encoding",
7272
"memory_map",
7373
"compression",
74-
"error_bad_lines",
75-
"warn_bad_lines",
7674
):
7775
kwds.pop(key, None)
7876

@@ -102,16 +100,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
102100

103101
# error: Cannot determine type of 'names'
104102
if self.names is None: # type: ignore[has-type]
105-
if self.prefix:
106-
# error: Cannot determine type of 'names'
107-
self.names = [ # type: ignore[has-type]
108-
f"{self.prefix}{i}" for i in range(self._reader.table_width)
109-
]
110-
else:
111-
# error: Cannot determine type of 'names'
112-
self.names = list( # type: ignore[has-type]
113-
range(self._reader.table_width)
114-
)
103+
# error: Cannot determine type of 'names'
104+
self.names = list(range(self._reader.table_width)) # type: ignore[has-type]
115105

116106
# gh-9755
117107
#

pandas/io/parsers/python_parser.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -536,10 +536,7 @@ def _infer_columns(
536536
num_original_columns = ncols
537537

538538
if not names:
539-
if self.prefix:
540-
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
541-
else:
542-
columns = [list(range(ncols))]
539+
columns = [list(range(ncols))]
543540
columns = self._handle_usecols(
544541
columns, columns[0], num_original_columns
545542
)

0 commit comments

Comments
 (0)