Skip to content

DEPR: Warn about Series.to_csv signature alignment #21868

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ Deprecations
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`).
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
-
- The signature in :meth:`Series.to_csv` has been deprecated. Please follow the signature in :meth:`DataFrame.to_csv` instead (:issue:`19745`)

.. _whatsnew_0240.prior_deprecations:

Expand Down
97 changes: 0 additions & 97 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,103 +1710,6 @@ def to_panel(self):

return self._constructor_expanddim(new_mgr)

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Write DataFrame to a comma-separated values (csv) file

Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.

Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data

"""

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

from pandas.io.formats.csvs import CSVFormatter
formatter = CSVFormatter(self, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()

@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
Expand Down
101 changes: 101 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9161,6 +9161,107 @@ def first_valid_index(self):
def last_valid_index(self):
return self._find_valid_index('last')

def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
r"""Export to a comma-separated values (CSV) file

Parameters
----------
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
sep : character, default ','
Field delimiter for the output file.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
columns : sequence, optional
Columns to write
header : boolean or list of string, default True
Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R
mode : str
Python write mode, default 'w'
encoding : string, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
If 'infer' and `path_or_buf` is path-like, then detect compression
from the following extensions: '.gz', '.bz2' or '.xz'
(otherwise no compression).
line_terminator : string, default ``'\n'``
The newline character or character sequence to use in the output
file
quoting : optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric
quotechar : string (length 1), default '\"'
character used to quote fields
doublequote : boolean, default True
Control quoting of `quotechar` inside a field
escapechar : string (length 1), default None
character used to escape `sep` and `quotechar` when appropriate
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always write each row
of the multi-index as a separate row in the CSV file.

Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
date_format : string, default None
Format string for datetime objects
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data

"""

from pandas.core.frame import DataFrame
from pandas.io.formats.csvs import CSVFormatter

df = self if isinstance(self, DataFrame) else DataFrame(self)

if tupleize_cols is not None:
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=2)
else:
tupleize_cols = False

formatter = CSVFormatter(df, path_or_buf,
line_terminator=line_terminator, sep=sep,
encoding=encoding,
compression=compression, quoting=quoting,
na_rep=na_rep, float_format=float_format,
cols=columns, header=header, index=index,
index_label=index_label, mode=mode,
chunksize=chunksize, quotechar=quotechar,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar, decimal=decimal)
formatter.save()

if path_or_buf is None:
return formatter.path_or_buf.getvalue()


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
Expand Down
68 changes: 53 additions & 15 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3760,24 +3760,30 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,

return result

def to_csv(self, path=None, index=True, sep=",", na_rep='',
float_format=None, header=False, index_label=None,
def to_csv(self, path_or_buf=None, index=True, sep=",", na_rep='',
float_format=None, header=None, index_label=None,
mode='w', encoding=None, compression=None, date_format=None,
decimal='.'):
"""
Write Series to a comma-separated values (csv) file
decimal='.', **kwargs):
"""Export to a comma-separated values (CSV) file

.. deprecated:: 0.24.0
The signature will aligned to that of :func:`DataFrame.to_csv`.

:func:`Series.to_csv` will align its signature with that of
`DataFrame.to_csv`. Please pass in keyword arguments in accordance
with that signature instead.

Parameters
----------
path : string or file handle, default None
path_or_buf : string or file handle, default None
File path or object, if None is provided the result is returned as
a string.
na_rep : string, default ''
Missing data representation
float_format : string, default None
Format string for floating point numbers
header : boolean, default False
Write out series name
header : boolean, default None
Write out Series name. By default, the name will be omitted.
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Expand All @@ -3800,15 +3806,47 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
Character recognized as decimal separator. E.g. use ',' for
European data
"""

from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode,
encoding=encoding, compression=compression,
date_format=date_format, decimal=decimal)
if path is None:

new_path_key = "path_or_buf"
old_path_key = "path"
emit_warning = False

# For backwards compatibility, override the `path_of_buf`
# argument if a `path` keyword argument is provided.
if kwargs.get(old_path_key, None) is not None:
kwargs[new_path_key] = kwargs.pop(old_path_key)
emit_warning = True

if header is None:
emit_warning = True
header = False

if emit_warning:
warnings.warn("The signature of `Series.to_csv` will be "
"aligned to that of `DataFrame.to_csv` in the "
"future. Note that some of the default arguments "
"and argument names are different, so please refer "
"to the documentation for `DataFrame.to_csv` when "
"changing your function calls.",
FutureWarning, stacklevel=2)
header = False

to_csv_kwargs = dict(path_or_buf=path_or_buf, index=index, sep=sep,
na_rep=na_rep, float_format=float_format,
header=header, index_label=index_label,
mode=mode, encoding=encoding,
compression=compression,
date_format=date_format,
decimal=decimal)
to_csv_kwargs.update(**kwargs)

# Result is only a string if no path provided, otherwise None.
result = df.to_csv(**to_csv_kwargs)

if to_csv_kwargs[new_path_key] is None:
return result

@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,14 +893,16 @@ def test_to_csv_line_terminators(self):

def test_to_csv_from_csv_categorical(self):

# CSV with categoricals should result in the same output as when one
# would add a "normal" Series/DataFrame.
# CSV with Categoricals should result in the same output
# as when one would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
res = StringIO()
s.to_csv(res)

s.to_csv(res, header=False)
exp = StringIO()
s2.to_csv(exp)

s2.to_csv(exp, header=False)
assert res.getvalue() == exp.getvalue()

df = DataFrame({"s": s})
Expand Down
Loading