Skip to content

Commit 14e1c5a

Browse files
authored
CLN: Remove tupleize_cols keyword in io methods; from_csv method (#27126)
* Remove tuplize_col keyword in read_html, read_csv, to_csv * Remove from_csv * Remove warning check and type
1 parent cb5b75b commit 14e1c5a

File tree

14 files changed

+16
-273
lines changed

14 files changed

+16
-273
lines changed

doc/source/reference/frame.rst

-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ Serialization / IO / conversion
336336
.. autosummary::
337337
:toctree: api/
338338

339-
DataFrame.from_csv
340339
DataFrame.from_dict
341340
DataFrame.from_items
342341
DataFrame.from_records

doc/source/user_guide/io.rst

-9
Original file line numberDiff line numberDiff line change
@@ -340,13 +340,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
340340
`skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
341341
override values, a ParserWarning will be issued. See :class:`python:csv.Dialect`
342342
documentation for more details.
343-
tupleize_cols : boolean, default ``False``
344-
.. deprecated:: 0.21.0
345-
346-
This argument will be removed and will always convert to MultiIndex
347-
348-
Leave a list of tuples on columns as is (default is to convert to a MultiIndex
349-
on the columns).
350343

351344
Error handling
352345
++++++++++++++
@@ -1718,8 +1711,6 @@ function takes a number of arguments. Only the first is required.
17181711
* ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when
17191712
appropriate (default None)
17201713
* ``chunksize``: Number of rows to write at a time
1721-
* ``tupleize_cols``: If False (default), write as a list of tuples, otherwise
1722-
write in an expanded line format suitable for ``read_csv``
17231714
* ``date_format``: Format string for datetime objects
17241715

17251716
Writing a formatted string

doc/source/whatsnew/v0.25.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,8 @@ Removal of prior version deprecations/changes
630630
- Removed the previously deprecated behavior of :class:`Series` treated as list-like in :meth:`~Series.cat.rename_categories` (:issue:`17982`)
631631
- Removed the previously deprecated ``DataFrame.reindex_axis`` and ``Series.reindex_axis``` (:issue:`17842`)
632632
- Removed the previously deprecated behavior of altering column or index labels with :meth:`Series.rename_axis` or :meth:`DataFrame.rename_axis` (:issue:`17842`)
633+
- Removed the previously deprecated ``tupleize_cols`` keyword argument in :meth:`read_html`, :meth:`read_csv`, and :meth:`DataFrame.to_csv` (:issue:`17877`, :issue:`17820`)
634+
- Removed the previously deprecated ``DataFrame.from.csv`` and ``Series.from_csv`` (:issue:`17812`)
633635

634636
.. _whatsnew_0250.performance:
635637

pandas/_libs/parsers.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ cdef class TextReader:
297297
object encoding
298298
object compression
299299
object mangle_dupe_cols
300-
object tupleize_cols
301300
object usecols
302301
list dtype_cast_order
303302
set unnamed_cols
@@ -351,7 +350,6 @@ cdef class TextReader:
351350
skipfooter=0,
352351
verbose=False,
353352
mangle_dupe_cols=True,
354-
tupleize_cols=False,
355353
float_precision=None,
356354
skip_blank_lines=True):
357355

@@ -370,7 +368,6 @@ cdef class TextReader:
370368
self.parser.chunksize = tokenize_chunksize
371369

372370
self.mangle_dupe_cols = mangle_dupe_cols
373-
self.tupleize_cols = tupleize_cols
374371

375372
# For timekeeping
376373
self.clocks = []

pandas/core/frame.py

+1-68
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def _constructor(self):
331331

332332
_constructor_sliced = Series # type: Type[Series]
333333
_deprecations = NDFrame._deprecations | frozenset([
334-
'get_value', 'set_value', 'from_csv', 'from_items'
334+
'get_value', 'set_value', 'from_items'
335335
]) # type: FrozenSet[str]
336336
_accessors = set() # type: Set[str]
337337

@@ -1786,73 +1786,6 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
17861786
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
17871787
return cls(mgr)
17881788

1789-
@classmethod
1790-
def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
1791-
encoding=None, tupleize_cols=None,
1792-
infer_datetime_format=False):
1793-
"""
1794-
Read CSV file.
1795-
1796-
.. deprecated:: 0.21.0
1797-
Use :func:`read_csv` instead.
1798-
1799-
It is preferable to use the more powerful :func:`read_csv`
1800-
for most general purposes, but ``from_csv`` makes for an easy
1801-
roundtrip to and from a file (the exact counterpart of
1802-
``to_csv``), especially with a DataFrame of time series data.
1803-
1804-
This method only differs from the preferred :func:`read_csv`
1805-
in some defaults:
1806-
1807-
- `index_col` is ``0`` instead of ``None`` (take first column as index
1808-
by default)
1809-
- `parse_dates` is ``True`` instead of ``False`` (try parsing the index
1810-
as datetime by default)
1811-
1812-
So a ``pd.DataFrame.from_csv(path)`` can be replaced by
1813-
``pd.read_csv(path, index_col=0, parse_dates=True)``.
1814-
1815-
Parameters
1816-
----------
1817-
path : string file path or file handle / StringIO
1818-
header : int, default 0
1819-
Row to use as header (skip prior rows)
1820-
sep : string, default ','
1821-
Field delimiter
1822-
index_col : int or sequence, default 0
1823-
Column to use for index. If a sequence is given, a MultiIndex
1824-
is used. Different default from read_table
1825-
parse_dates : boolean, default True
1826-
Parse dates. Different default from read_table
1827-
tupleize_cols : boolean, default False
1828-
write multi_index columns as a list of tuples (if True)
1829-
or new (expanded format) if False)
1830-
infer_datetime_format : boolean, default False
1831-
If True and `parse_dates` is True for a column, try to infer the
1832-
datetime format based on the first datetime string. If the format
1833-
can be inferred, there often will be a large parsing speed-up.
1834-
1835-
Returns
1836-
-------
1837-
DataFrame
1838-
1839-
See Also
1840-
--------
1841-
read_csv
1842-
"""
1843-
1844-
warnings.warn("from_csv is deprecated. Please use read_csv(...) "
1845-
"instead. Note that some of the default arguments are "
1846-
"different, so please refer to the documentation "
1847-
"for from_csv when changing your function calls",
1848-
FutureWarning, stacklevel=2)
1849-
1850-
from pandas.io.parsers import read_csv
1851-
return read_csv(path, header=header, sep=sep,
1852-
parse_dates=parse_dates, index_col=index_col,
1853-
encoding=encoding, tupleize_cols=tupleize_cols,
1854-
infer_datetime_format=infer_datetime_format)
1855-
18561789
def to_sparse(self, fill_value=None, kind='block'):
18571790
"""
18581791
Convert to SparseDataFrame.

pandas/core/generic.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -2903,7 +2903,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
29032903
columns=None, header=True, index=True, index_label=None,
29042904
mode='w', encoding=None, compression='infer', quoting=None,
29052905
quotechar='"', line_terminator=None, chunksize=None,
2906-
tupleize_cols=None, date_format=None, doublequote=True,
2906+
date_format=None, doublequote=True,
29072907
escapechar=None, decimal='.'):
29082908
r"""
29092909
Write object to a comma-separated values (csv) file.
@@ -2976,14 +2976,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
29762976
.. versionchanged:: 0.24.0
29772977
chunksize : int or None
29782978
Rows to write at a time.
2979-
tupleize_cols : bool, default False
2980-
Write MultiIndex columns as a list of tuples (if True) or in
2981-
the new, expanded format, where each MultiIndex column is a row
2982-
in the CSV (if False).
2983-
2984-
.. deprecated:: 0.21.0
2985-
This argument will be removed and will always write each row
2986-
of the multi-index as a separate row in the CSV file.
29872979
date_format : str, default None
29882980
Format string for datetime objects.
29892981
doublequote : bool, default True
@@ -3017,13 +3009,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
30173009

30183010
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
30193011

3020-
if tupleize_cols is not None:
3021-
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
3022-
"will be removed in a future version",
3023-
FutureWarning, stacklevel=2)
3024-
else:
3025-
tupleize_cols = False
3026-
30273012
from pandas.io.formats.csvs import CSVFormatter
30283013
formatter = CSVFormatter(df, path_or_buf,
30293014
line_terminator=line_terminator, sep=sep,
@@ -3033,7 +3018,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
30333018
cols=columns, header=header, index=index,
30343019
index_label=index_label, mode=mode,
30353020
chunksize=chunksize, quotechar=quotechar,
3036-
tupleize_cols=tupleize_cols,
30373021
date_format=date_format,
30383022
doublequote=doublequote,
30393023
escapechar=escapechar, decimal=decimal)

pandas/core/series.py

+2-70
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
137137
# tolist is not actually deprecated, just suppressed in the __dir__
138138
_deprecations = generic.NDFrame._deprecations | frozenset(
139139
['asobject', 'reshape', 'get_value', 'set_value',
140-
'from_csv', 'valid', 'tolist'])
140+
'valid', 'tolist'])
141141

142142
# Override cache_readonly bc Series is mutable
143143
hasnans = property(base.IndexOpsMixin.hasnans.func,
@@ -4212,81 +4212,13 @@ def between(self, left, right, inclusive=True):
42124212

42134213
return lmask & rmask
42144214

4215-
@classmethod
4216-
def from_csv(cls, path, sep=',', parse_dates=True, header=None,
4217-
index_col=0, encoding=None, infer_datetime_format=False):
4218-
"""
4219-
Read CSV file.
4220-
4221-
.. deprecated:: 0.21.0
4222-
Use :func:`pandas.read_csv` instead.
4223-
4224-
It is preferable to use the more powerful :func:`pandas.read_csv`
4225-
for most general purposes, but ``from_csv`` makes for an easy
4226-
roundtrip to and from a file (the exact counterpart of
4227-
``to_csv``), especially with a time Series.
4228-
4229-
This method only differs from :func:`pandas.read_csv` in some defaults:
4230-
4231-
- `index_col` is ``0`` instead of ``None`` (take first column as index
4232-
by default)
4233-
- `header` is ``None`` instead of ``0`` (the first row is not used as
4234-
the column names)
4235-
- `parse_dates` is ``True`` instead of ``False`` (try parsing the index
4236-
as datetime by default)
4237-
4238-
With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
4239-
to return a Series like ``from_csv``.
4240-
4241-
Parameters
4242-
----------
4243-
path : str, file path, or file handle / StringIO
4244-
sep : str, default ','
4245-
Field delimiter.
4246-
parse_dates : bool, default True
4247-
Parse dates. Different default from read_table.
4248-
header : int, default None
4249-
Row to use as header (skip prior rows).
4250-
index_col : int or sequence, default 0
4251-
Column to use for index. If a sequence is given, a MultiIndex
4252-
is used. Different default from read_table.
4253-
encoding : str, optional
4254-
A string representing the encoding to use if the contents are
4255-
non-ascii, for python versions prior to 3.
4256-
infer_datetime_format : bool, default False
4257-
If True and `parse_dates` is True for a column, try to infer the
4258-
datetime format based on the first datetime string. If the format
4259-
can be inferred, there often will be a large parsing speed-up.
4260-
4261-
Returns
4262-
-------
4263-
Series
4264-
4265-
See Also
4266-
--------
4267-
read_csv
4268-
"""
4269-
4270-
# We're calling `DataFrame.from_csv` in the implementation,
4271-
# which will propagate a warning regarding `from_csv` deprecation.
4272-
from pandas.core.frame import DataFrame
4273-
df = DataFrame.from_csv(path, header=header, index_col=index_col,
4274-
sep=sep, parse_dates=parse_dates,
4275-
encoding=encoding,
4276-
infer_datetime_format=infer_datetime_format)
4277-
result = df.iloc[:, 0]
4278-
if header is None:
4279-
result.index.name = result.name = None
4280-
4281-
return result
4282-
42834215
@Appender(generic.NDFrame.to_csv.__doc__)
42844216
def to_csv(self, *args, **kwargs):
42854217

42864218
names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
42874219
"header", "index", "index_label", "mode", "encoding",
42884220
"compression", "quoting", "quotechar", "line_terminator",
4289-
"chunksize", "tupleize_cols", "date_format", "doublequote",
4221+
"chunksize", "date_format", "doublequote",
42904222
"escapechar", "decimal"]
42914223

42924224
old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",

pandas/io/formats/csvs.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ class CSVFormatter:
2424

2525
def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
2626
float_format=None, cols=None, header=True, index=True,
27-
index_label=None, mode='w', nanRep=None, encoding=None,
27+
index_label=None, mode='w', encoding=None,
2828
compression='infer', quoting=None, line_terminator='\n',
29-
chunksize=None, tupleize_cols=False, quotechar='"',
29+
chunksize=None, quotechar='"',
3030
date_format=None, doublequote=True, escapechar=None,
3131
decimal='.'):
3232

@@ -68,9 +68,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
6868

6969
self.date_format = date_format
7070

71-
self.tupleize_cols = tupleize_cols
72-
self.has_mi_columns = (isinstance(obj.columns, ABCMultiIndex) and
73-
not self.tupleize_cols)
71+
self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)
7472

7573
# validate mi options
7674
if self.has_mi_columns:

pandas/io/html.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
912912

913913
def read_html(io, match='.+', flavor=None, header=None, index_col=None,
914914
skiprows=None, attrs=None, parse_dates=False,
915-
tupleize_cols=None, thousands=',', encoding=None,
915+
thousands=',', encoding=None,
916916
decimal='.', converters=None, na_values=None,
917917
keep_default_na=True, displayed_only=True):
918918
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
@@ -976,14 +976,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
976976
parse_dates : bool, optional
977977
See :func:`~read_csv` for more details.
978978
979-
tupleize_cols : bool, optional
980-
If ``False`` try to parse multiple header rows into a
981-
:class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to
982-
``False``.
983-
984-
.. deprecated:: 0.21.0
985-
This argument will be removed and will always convert to MultiIndex
986-
987979
thousands : str, optional
988980
Separator to use to parse thousands. Defaults to ``','``.
989981
@@ -1073,7 +1065,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
10731065
_validate_header_arg(header)
10741066
return _parse(flavor=flavor, io=io, match=match, header=header,
10751067
index_col=index_col, skiprows=skiprows,
1076-
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
1068+
parse_dates=parse_dates,
10771069
thousands=thousands, attrs=attrs, encoding=encoding,
10781070
decimal=decimal, converters=converters, na_values=na_values,
10791071
keep_default_na=keep_default_na,

0 commit comments

Comments
 (0)