Skip to content

CLN: Remove tupleize_cols keyword in io methods; from_csv method #27126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ Serialization / IO / conversion
.. autosummary::
:toctree: api/

DataFrame.from_csv
DataFrame.from_dict
DataFrame.from_items
DataFrame.from_records
Expand Down
9 changes: 0 additions & 9 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
`skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
override values, a ParserWarning will be issued. See :class:`python:csv.Dialect`
documentation for more details.
tupleize_cols : boolean, default ``False``
.. deprecated:: 0.21.0

This argument will be removed and will always convert to MultiIndex

Leave a list of tuples on columns as is (default is to convert to a MultiIndex
on the columns).

Error handling
++++++++++++++
Expand Down Expand Up @@ -1718,8 +1711,6 @@ function takes a number of arguments. Only the first is required.
* ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when
appropriate (default None)
* ``chunksize``: Number of rows to write at a time
* ``tupleize_cols``: If False (default), write as a list of tuples, otherwise
write in an expanded line format suitable for ``read_csv``
* ``date_format``: Format string for datetime objects

Writing a formatted string
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ Removal of prior version deprecations/changes
- Removed the previously deprecated behavior of :class:`Series` treated as list-like in :meth:`~Series.cat.rename_categories` (:issue:`17982`)
- Removed the previously deprecated ``DataFrame.reindex_axis`` and ``Series.reindex_axis``` (:issue:`17842`)
- Removed the previously deprecated behavior of altering column or index labels with :meth:`Series.rename_axis` or :meth:`DataFrame.rename_axis` (:issue:`17842`)
- Removed the previously deprecated ``tupleize_cols`` keyword argument in :meth:`read_html`, :meth:`read_csv`, and :meth:`DataFrame.to_csv` (:issue:`17877`, :issue:`17820`)
- Removed the previously deprecated ``DataFrame.from.csv`` and ``Series.from_csv`` (:issue:`17812`)

.. _whatsnew_0250.performance:

Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ cdef class TextReader:
object encoding
object compression
object mangle_dupe_cols
object tupleize_cols
object usecols
list dtype_cast_order
set unnamed_cols
Expand Down Expand Up @@ -351,7 +350,6 @@ cdef class TextReader:
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
tupleize_cols=False,
float_precision=None,
skip_blank_lines=True):

Expand All @@ -370,7 +368,6 @@ cdef class TextReader:
self.parser.chunksize = tokenize_chunksize

self.mangle_dupe_cols = mangle_dupe_cols
self.tupleize_cols = tupleize_cols

# For timekeeping
self.clocks = []
Expand Down
69 changes: 1 addition & 68 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def _constructor(self):

_constructor_sliced = Series # type: Type[Series]
_deprecations = NDFrame._deprecations | frozenset([
'get_value', 'set_value', 'from_csv', 'from_items'
'get_value', 'set_value', 'from_items'
]) # type: FrozenSet[str]
_accessors = set() # type: Set[str]

Expand Down Expand Up @@ -1786,73 +1786,6 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
return cls(mgr)

@classmethod
def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
encoding=None, tupleize_cols=None,
infer_datetime_format=False):
"""
Read CSV file.

.. deprecated:: 0.21.0
Use :func:`read_csv` instead.

It is preferable to use the more powerful :func:`read_csv`
for most general purposes, but ``from_csv`` makes for an easy
roundtrip to and from a file (the exact counterpart of
``to_csv``), especially with a DataFrame of time series data.

This method only differs from the preferred :func:`read_csv`
in some defaults:

- `index_col` is ``0`` instead of ``None`` (take first column as index
by default)
- `parse_dates` is ``True`` instead of ``False`` (try parsing the index
as datetime by default)

So a ``pd.DataFrame.from_csv(path)`` can be replaced by
``pd.read_csv(path, index_col=0, parse_dates=True)``.

Parameters
----------
path : string file path or file handle / StringIO
header : int, default 0
Row to use as header (skip prior rows)
sep : string, default ','
Field delimiter
index_col : int or sequence, default 0
Column to use for index. If a sequence is given, a MultiIndex
is used. Different default from read_table
parse_dates : boolean, default True
Parse dates. Different default from read_table
tupleize_cols : boolean, default False
write multi_index columns as a list of tuples (if True)
or new (expanded format) if False)
infer_datetime_format : boolean, default False
If True and `parse_dates` is True for a column, try to infer the
datetime format based on the first datetime string. If the format
can be inferred, there often will be a large parsing speed-up.

Returns
-------
DataFrame

See Also
--------
read_csv
"""

warnings.warn("from_csv is deprecated. Please use read_csv(...) "
"instead. Note that some of the default arguments are "
"different, so please refer to the documentation "
"for from_csv when changing your function calls",
FutureWarning, stacklevel=2)

from pandas.io.parsers import read_csv
return read_csv(path, header=header, sep=sep,
parse_dates=parse_dates, index_col=index_col,
encoding=encoding, tupleize_cols=tupleize_cols,
infer_datetime_format=infer_datetime_format)

def to_sparse(self, fill_value=None, kind='block'):
"""
Convert to SparseDataFrame.
Expand Down
18 changes: 1 addition & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2903,7 +2903,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression='infer', quoting=None,
quotechar='"', line_terminator=None, chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""
Write object to a comma-separated values (csv) file.
Expand Down Expand Up @@ -2976,14 +2976,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
.. versionchanged:: 0.24.0
chunksize : int or None
Rows to write at a time.
tupleize_cols : bool, default False
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).

.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.
date_format : str, default None
Format string for datetime objects.
doublequote : bool, default True
Expand Down Expand Up @@ -3017,13 +3009,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,

df = self if isinstance(self, ABCDataFrame) else self.to_frame()

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(df, path_or_buf,
line_terminator=line_terminator, sep=sep,
Expand All @@ -3033,7 +3018,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
Expand Down
72 changes: 2 additions & 70 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
# tolist is not actually deprecated, just suppressed in the __dir__
_deprecations = generic.NDFrame._deprecations | frozenset(
['asobject', 'reshape', 'get_value', 'set_value',
'from_csv', 'valid', 'tolist'])
'valid', 'tolist'])

# Override cache_readonly bc Series is mutable
hasnans = property(base.IndexOpsMixin.hasnans.func,
Expand Down Expand Up @@ -4212,81 +4212,13 @@ def between(self, left, right, inclusive=True):

return lmask & rmask

@classmethod
def from_csv(cls, path, sep=',', parse_dates=True, header=None,
index_col=0, encoding=None, infer_datetime_format=False):
"""
Read CSV file.

.. deprecated:: 0.21.0
Use :func:`pandas.read_csv` instead.

It is preferable to use the more powerful :func:`pandas.read_csv`
for most general purposes, but ``from_csv`` makes for an easy
roundtrip to and from a file (the exact counterpart of
``to_csv``), especially with a time Series.

This method only differs from :func:`pandas.read_csv` in some defaults:

- `index_col` is ``0`` instead of ``None`` (take first column as index
by default)
- `header` is ``None`` instead of ``0`` (the first row is not used as
the column names)
- `parse_dates` is ``True`` instead of ``False`` (try parsing the index
as datetime by default)

With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
to return a Series like ``from_csv``.

Parameters
----------
path : str, file path, or file handle / StringIO
sep : str, default ','
Field delimiter.
parse_dates : bool, default True
Parse dates. Different default from read_table.
header : int, default None
Row to use as header (skip prior rows).
index_col : int or sequence, default 0
Column to use for index. If a sequence is given, a MultiIndex
is used. Different default from read_table.
encoding : str, optional
A string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3.
infer_datetime_format : bool, default False
If True and `parse_dates` is True for a column, try to infer the
datetime format based on the first datetime string. If the format
can be inferred, there often will be a large parsing speed-up.

Returns
-------
Series

See Also
--------
read_csv
"""

# We're calling `DataFrame.from_csv` in the implementation,
# which will propagate a warning regarding `from_csv` deprecation.
from pandas.core.frame import DataFrame
df = DataFrame.from_csv(path, header=header, index_col=index_col,
sep=sep, parse_dates=parse_dates,
encoding=encoding,
infer_datetime_format=infer_datetime_format)
result = df.iloc[:, 0]
if header is None:
result.index.name = result.name = None

return result

@Appender(generic.NDFrame.to_csv.__doc__)
def to_csv(self, *args, **kwargs):

names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
"header", "index", "index_label", "mode", "encoding",
"compression", "quoting", "quotechar", "line_terminator",
"chunksize", "tupleize_cols", "date_format", "doublequote",
"chunksize", "date_format", "doublequote",
"escapechar", "decimal"]

old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
Expand Down
8 changes: 3 additions & 5 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ class CSVFormatter:

def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, mode='w', nanRep=None, encoding=None,
index_label=None, mode='w', encoding=None,
compression='infer', quoting=None, line_terminator='\n',
chunksize=None, tupleize_cols=False, quotechar='"',
chunksize=None, quotechar='"',
date_format=None, doublequote=True, escapechar=None,
decimal='.'):

Expand Down Expand Up @@ -68,9 +68,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',

self.date_format = date_format

self.tupleize_cols = tupleize_cols
self.has_mi_columns = (isinstance(obj.columns, ABCMultiIndex) and
not self.tupleize_cols)
self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)

# validate mi options
if self.has_mi_columns:
Expand Down
12 changes: 2 additions & 10 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):

def read_html(io, match='.+', flavor=None, header=None, index_col=None,
skiprows=None, attrs=None, parse_dates=False,
tupleize_cols=None, thousands=',', encoding=None,
thousands=',', encoding=None,
decimal='.', converters=None, na_values=None,
keep_default_na=True, displayed_only=True):
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
Expand Down Expand Up @@ -976,14 +976,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
parse_dates : bool, optional
See :func:`~read_csv` for more details.

tupleize_cols : bool, optional
If ``False`` try to parse multiple header rows into a
:class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to
``False``.

.. deprecated:: 0.21.0
This argument will be removed and will always convert to MultiIndex

thousands : str, optional
Separator to use to parse thousands. Defaults to ``','``.

Expand Down Expand Up @@ -1073,7 +1065,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
_validate_header_arg(header)
return _parse(flavor=flavor, io=io, match=match, header=header,
index_col=index_col, skiprows=skiprows,
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
parse_dates=parse_dates,
thousands=thousands, attrs=attrs, encoding=encoding,
decimal=decimal, converters=converters, na_values=na_values,
keep_default_na=keep_default_na,
Expand Down
Loading