Skip to content

Commit e69c5ca

Browse files
committed
API: Deprecate old Series.to_csv signature
closes pandas-dev#19715
1 parent 6e1e1e4 commit e69c5ca

File tree

7 files changed

+230
-182
lines changed

7 files changed

+230
-182
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ Deprecations
477477
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
478478
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
479479
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
480+
- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`)
480481
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
481482
- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
482483

pandas/core/frame.py

-101
Original file line numberDiff line numberDiff line change
@@ -1714,107 +1714,6 @@ def to_panel(self):
17141714

17151715
return self._constructor_expanddim(new_mgr)
17161716

1717-
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
1718-
columns=None, header=True, index=True, index_label=None,
1719-
mode='w', encoding=None, compression='infer', quoting=None,
1720-
quotechar='"', line_terminator='\n', chunksize=None,
1721-
tupleize_cols=None, date_format=None, doublequote=True,
1722-
escapechar=None, decimal='.'):
1723-
r"""Write DataFrame to a comma-separated values (csv) file
1724-
1725-
Parameters
1726-
----------
1727-
path_or_buf : string or file handle, default None
1728-
File path or object, if None is provided the result is returned as
1729-
a string.
1730-
sep : character, default ','
1731-
Field delimiter for the output file.
1732-
na_rep : string, default ''
1733-
Missing data representation
1734-
float_format : string, default None
1735-
Format string for floating point numbers
1736-
columns : sequence, optional
1737-
Columns to write
1738-
header : boolean or list of string, default True
1739-
Write out the column names. If a list of strings is given it is
1740-
assumed to be aliases for the column names
1741-
index : boolean, default True
1742-
Write row names (index)
1743-
index_label : string or sequence, or False, default None
1744-
Column label for index column(s) if desired. If None is given, and
1745-
`header` and `index` are True, then the index names are used. A
1746-
sequence should be given if the DataFrame uses MultiIndex. If
1747-
False do not print fields for index names. Use index_label=False
1748-
for easier importing in R
1749-
mode : str
1750-
Python write mode, default 'w'
1751-
encoding : string, optional
1752-
A string representing the encoding to use in the output file,
1753-
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
1754-
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
1755-
default 'infer'
1756-
If 'infer' and `path_or_buf` is path-like, then detect compression
1757-
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
1758-
(otherwise no compression).
1759-
1760-
.. versionchanged:: 0.24.0
1761-
'infer' option added and set to default
1762-
line_terminator : string, default ``'\n'``
1763-
The newline character or character sequence to use in the output
1764-
file
1765-
quoting : optional constant from csv module
1766-
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
1767-
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
1768-
will treat them as non-numeric
1769-
quotechar : string (length 1), default '\"'
1770-
character used to quote fields
1771-
doublequote : boolean, default True
1772-
Control quoting of `quotechar` inside a field
1773-
escapechar : string (length 1), default None
1774-
character used to escape `sep` and `quotechar` when appropriate
1775-
chunksize : int or None
1776-
rows to write at a time
1777-
tupleize_cols : boolean, default False
1778-
.. deprecated:: 0.21.0
1779-
This argument will be removed and will always write each row
1780-
of the multi-index as a separate row in the CSV file.
1781-
1782-
Write MultiIndex columns as a list of tuples (if True) or in
1783-
the new, expanded format, where each MultiIndex column is a row
1784-
in the CSV (if False).
1785-
date_format : string, default None
1786-
Format string for datetime objects
1787-
decimal: string, default '.'
1788-
Character recognized as decimal separator. E.g. use ',' for
1789-
European data
1790-
1791-
"""
1792-
1793-
if tupleize_cols is not None:
1794-
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
1795-
"will be removed in a future version",
1796-
FutureWarning, stacklevel=2)
1797-
else:
1798-
tupleize_cols = False
1799-
1800-
from pandas.io.formats.csvs import CSVFormatter
1801-
formatter = CSVFormatter(self, path_or_buf,
1802-
line_terminator=line_terminator, sep=sep,
1803-
encoding=encoding,
1804-
compression=compression, quoting=quoting,
1805-
na_rep=na_rep, float_format=float_format,
1806-
cols=columns, header=header, index=index,
1807-
index_label=index_label, mode=mode,
1808-
chunksize=chunksize, quotechar=quotechar,
1809-
tupleize_cols=tupleize_cols,
1810-
date_format=date_format,
1811-
doublequote=doublequote,
1812-
escapechar=escapechar, decimal=decimal)
1813-
formatter.save()
1814-
1815-
if path_or_buf is None:
1816-
return formatter.path_or_buf.getvalue()
1817-
18181717
@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
18191718
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
18201719
float_format=None, columns=None, header=True, index=True,

pandas/core/generic.py

+109
Original file line numberDiff line numberDiff line change
@@ -9270,6 +9270,115 @@ def first_valid_index(self):
92709270
def last_valid_index(self):
92719271
return self._find_valid_index('last')
92729272

9273+
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
9274+
columns=None, header=True, index=True, index_label=None,
9275+
mode='w', encoding=None, compression='infer', quoting=None,
9276+
quotechar='"', line_terminator='\n', chunksize=None,
9277+
tupleize_cols=None, date_format=None, doublequote=True,
9278+
escapechar=None, decimal='.'):
9279+
r"""Write object to a comma-separated values (csv) file
9280+
9281+
Parameters
9282+
----------
9283+
path_or_buf : string or file handle, default None
9284+
File path or object, if None is provided the result is returned as
9285+
a string.
9286+
.. versionchanged:: 0.24.0
9287+
Was previously named "path" for Series.
9288+
sep : character, default ','
9289+
Field delimiter for the output file.
9290+
na_rep : string, default ''
9291+
Missing data representation
9292+
float_format : string, default None
9293+
Format string for floating point numbers
9294+
columns : sequence, optional
9295+
Columns to write
9296+
header : boolean or list of string, default True
9297+
Write out the column names. If a list of strings is given it is
9298+
assumed to be aliases for the column names
9299+
.. versionchanged:: 0.24.0
9300+
Previously defaulted to False for Series.
9301+
index : boolean, default True
9302+
Write row names (index)
9303+
index_label : string or sequence, or False, default None
9304+
Column label for index column(s) if desired. If None is given, and
9305+
`header` and `index` are True, then the index names are used. A
9306+
sequence should be given if the object uses MultiIndex. If
9307+
False do not print fields for index names. Use index_label=False
9308+
for easier importing in R
9309+
mode : str
9310+
Python write mode, default 'w'
9311+
encoding : string, optional
9312+
A string representing the encoding to use in the output file,
9313+
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
9314+
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
9315+
default 'infer'
9316+
If 'infer' and `path_or_buf` is path-like, then detect compression
9317+
from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
9318+
(otherwise no compression).
9319+
9320+
.. versionchanged:: 0.24.0
9321+
'infer' option added and set to default
9322+
line_terminator : string, default ``'\n'``
9323+
The newline character or character sequence to use in the output
9324+
file
9325+
quoting : optional constant from csv module
9326+
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
9327+
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
9328+
will treat them as non-numeric
9329+
quotechar : string (length 1), default '\"'
9330+
character used to quote fields
9331+
doublequote : boolean, default True
9332+
Control quoting of `quotechar` inside a field
9333+
escapechar : string (length 1), default None
9334+
character used to escape `sep` and `quotechar` when appropriate
9335+
chunksize : int or None
9336+
rows to write at a time
9337+
tupleize_cols : boolean, default False
9338+
.. deprecated:: 0.21.0
9339+
This argument will be removed and will always write each row
9340+
of the multi-index as a separate row in the CSV file.
9341+
9342+
Write MultiIndex columns as a list of tuples (if True) or in
9343+
the new, expanded format, where each MultiIndex column is a row
9344+
in the CSV (if False).
9345+
date_format : string, default None
9346+
Format string for datetime objects
9347+
decimal: string, default '.'
9348+
Character recognized as decimal separator. E.g. use ',' for
9349+
European data
9350+
9351+
.. versionchanged:: 0.24.0
9352+
The order of arguments for Series was changed.
9353+
"""
9354+
9355+
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
9356+
9357+
if tupleize_cols is not None:
9358+
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
9359+
"will be removed in a future version",
9360+
FutureWarning, stacklevel=2)
9361+
else:
9362+
tupleize_cols = False
9363+
9364+
from pandas.io.formats.csvs import CSVFormatter
9365+
formatter = CSVFormatter(df, path_or_buf,
9366+
line_terminator=line_terminator, sep=sep,
9367+
encoding=encoding,
9368+
compression=compression, quoting=quoting,
9369+
na_rep=na_rep, float_format=float_format,
9370+
cols=columns, header=header, index=index,
9371+
index_label=index_label, mode=mode,
9372+
chunksize=chunksize, quotechar=quotechar,
9373+
tupleize_cols=tupleize_cols,
9374+
date_format=date_format,
9375+
doublequote=doublequote,
9376+
escapechar=escapechar, decimal=decimal)
9377+
formatter.save()
9378+
9379+
if path_or_buf is None:
9380+
return formatter.path_or_buf.getvalue()
9381+
92739382

92749383
def _doc_parms(cls):
92759384
"""Return a tuple of the doc parms."""

pandas/core/series.py

+57-53
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.core.arrays import ExtensionArray
1818
from pandas.core.dtypes.common import (
1919
is_categorical_dtype,
20+
is_string_like,
2021
is_bool,
2122
is_integer, is_integer_dtype,
2223
is_float_dtype,
@@ -3765,59 +3766,62 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
37653766

37663767
return result
37673768

3768-
def to_csv(self, path=None, index=True, sep=",", na_rep='',
3769-
float_format=None, header=False, index_label=None,
3770-
mode='w', encoding=None, compression='infer', date_format=None,
3771-
decimal='.'):
3772-
"""
3773-
Write Series to a comma-separated values (csv) file
3774-
3775-
Parameters
3776-
----------
3777-
path : string or file handle, default None
3778-
File path or object, if None is provided the result is returned as
3779-
a string.
3780-
na_rep : string, default ''
3781-
Missing data representation
3782-
float_format : string, default None
3783-
Format string for floating point numbers
3784-
header : boolean, default False
3785-
Write out series name
3786-
index : boolean, default True
3787-
Write row names (index)
3788-
index_label : string or sequence, default None
3789-
Column label for index column(s) if desired. If None is given, and
3790-
`header` and `index` are True, then the index names are used. A
3791-
sequence should be given if the DataFrame uses MultiIndex.
3792-
mode : Python write mode, default 'w'
3793-
sep : character, default ","
3794-
Field delimiter for the output file.
3795-
encoding : string, optional
3796-
a string representing the encoding to use if the contents are
3797-
non-ascii, for python versions prior to 3
3798-
compression : None or string, default 'infer'
3799-
A string representing the compression to use in the output file.
3800-
Allowed values are None, 'gzip', 'bz2', 'zip', 'xz', and 'infer'.
3801-
This input is only used when the first argument is a filename.
3802-
3803-
.. versionchanged:: 0.24.0
3804-
'infer' option added and set to default
3805-
date_format: string, default None
3806-
Format string for datetime objects.
3807-
decimal: string, default '.'
3808-
Character recognized as decimal separator. E.g. use ',' for
3809-
European data
3810-
"""
3811-
from pandas.core.frame import DataFrame
3812-
df = DataFrame(self)
3813-
# result is only a string if no path provided, otherwise None
3814-
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
3815-
float_format=float_format, header=header,
3816-
index_label=index_label, mode=mode,
3817-
encoding=encoding, compression=compression,
3818-
date_format=date_format, decimal=decimal)
3819-
if path is None:
3820-
return result
3769+
@Appender(generic.NDFrame.to_csv.__doc__)
3770+
def to_csv(self, *args, **kwargs):
3771+
3772+
names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
3773+
"header", "index", "index_label", "mode", "encoding",
3774+
"compression", "quoting", "quotechar", "line_terminator",
3775+
"chunksize", "tupleize_cols", "date_format", "doublequote",
3776+
"escapechar", "decimal"]
3777+
3778+
old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
3779+
"header", "index_label", "mode", "encoding",
3780+
"compression", "date_format", "decimal"]
3781+
3782+
if "path" in kwargs:
3783+
warnings.warn("The signature of `Series.to_csv` was aligned "
3784+
"to that of `DataFrame.to_csv`, and argument "
3785+
"'path' will be renamed to 'path_or_buf'.",
3786+
FutureWarning, stacklevel=2)
3787+
kwargs["path_or_buf"] = kwargs.pop("path")
3788+
3789+
if len(args) > 1:
3790+
# Either "index" (old signature) or "sep" (new signature) is being
3791+
# passed as second argument (while the first is the same)
3792+
maybe_sep = args[1]
3793+
3794+
if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
3795+
# old signature
3796+
warnings.warn("The signature of `Series.to_csv` was aligned "
3797+
"to that of `DataFrame.to_csv`. Note that the "
3798+
"order of arguments changed, and the new one "
3799+
"has 'sep' in first place, for which \"{}\" is "
3800+
"not a valid value. The old order will cease to "
3801+
"be supported in a future version. Please refer "
3802+
"to the documentation for `DataFrame.to_csv` "
3803+
"when updating your function "
3804+
"calls.".format(maybe_sep),
3805+
FutureWarning, stacklevel=2)
3806+
names = old_names
3807+
3808+
pos_args = dict(zip(names[:len(args)], args))
3809+
3810+
for key in pos_args:
3811+
if key in kwargs:
3812+
raise ValueError("Argument given by name ('{}') and position "
3813+
"({})".format(key, names.index(key)))
3814+
kwargs[key] = pos_args[key]
3815+
3816+
if kwargs.get("header", None) is None:
3817+
warnings.warn("The signature of `Series.to_csv` was aligned "
3818+
"to that of `DataFrame.to_csv`, and argument "
3819+
"'header' will change its default value from False "
3820+
"to True: please pass an explicit value to suppress "
3821+
"this warning.", FutureWarning,
3822+
stacklevel=2)
3823+
kwargs["header"] = False # Backwards compatibility.
3824+
return self.to_frame().to_csv(**kwargs)
38213825

38223826
@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
38233827
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

pandas/tests/frame/test_to_csv.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -893,22 +893,27 @@ def test_to_csv_line_terminators(self):
893893

894894
def test_to_csv_from_csv_categorical(self):
895895

896-
# CSV with categoricals should result in the same output as when one
897-
# would add a "normal" Series/DataFrame.
898-
s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
899-
s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
896+
# CSV with categoricals should result in the same output
897+
# as when one would add a "normal" Series/DataFrame.
898+
s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
899+
s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
900900
res = StringIO()
901-
s.to_csv(res)
901+
902+
s.to_csv(res, header=False)
902903
exp = StringIO()
903-
s2.to_csv(exp)
904+
905+
s2.to_csv(exp, header=False)
904906
assert res.getvalue() == exp.getvalue()
905907

906908
df = DataFrame({"s": s})
907909
df2 = DataFrame({"s": s2})
910+
908911
res = StringIO()
909912
df.to_csv(res)
913+
910914
exp = StringIO()
911915
df2.to_csv(exp)
916+
912917
assert res.getvalue() == exp.getvalue()
913918

914919
def test_to_csv_path_is_none(self):

0 commit comments

Comments
 (0)