Skip to content

Commit 7d655b2

Browse files
committed
API: Proof of concept for pandas-dev#19715 based on pandas-dev#21868
closes pandas-dev#19715
1 parent 365eac4 commit 7d655b2

File tree

4 files changed

+164
-156
lines changed

4 files changed

+164
-156
lines changed

pandas/core/frame.py

-97
Original file line numberDiff line numberDiff line change
@@ -1710,103 +1710,6 @@ def to_panel(self):
17101710

17111711
return self._constructor_expanddim(new_mgr)
17121712

1713-
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
1714-
columns=None, header=True, index=True, index_label=None,
1715-
mode='w', encoding=None, compression=None, quoting=None,
1716-
quotechar='"', line_terminator='\n', chunksize=None,
1717-
tupleize_cols=None, date_format=None, doublequote=True,
1718-
escapechar=None, decimal='.'):
1719-
r"""Write DataFrame to a comma-separated values (csv) file
1720-
1721-
Parameters
1722-
----------
1723-
path_or_buf : string or file handle, default None
1724-
File path or object, if None is provided the result is returned as
1725-
a string.
1726-
sep : character, default ','
1727-
Field delimiter for the output file.
1728-
na_rep : string, default ''
1729-
Missing data representation
1730-
float_format : string, default None
1731-
Format string for floating point numbers
1732-
columns : sequence, optional
1733-
Columns to write
1734-
header : boolean or list of string, default True
1735-
Write out the column names. If a list of strings is given it is
1736-
assumed to be aliases for the column names
1737-
index : boolean, default True
1738-
Write row names (index)
1739-
index_label : string or sequence, or False, default None
1740-
Column label for index column(s) if desired. If None is given, and
1741-
`header` and `index` are True, then the index names are used. A
1742-
sequence should be given if the DataFrame uses MultiIndex. If
1743-
False do not print fields for index names. Use index_label=False
1744-
for easier importing in R
1745-
mode : str
1746-
Python write mode, default 'w'
1747-
encoding : string, optional
1748-
A string representing the encoding to use in the output file,
1749-
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
1750-
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
1751-
If 'infer' and `path_or_buf` is path-like, then detect compression
1752-
from the following extensions: '.gz', '.bz2' or '.xz'
1753-
(otherwise no compression).
1754-
line_terminator : string, default ``'\n'``
1755-
The newline character or character sequence to use in the output
1756-
file
1757-
quoting : optional constant from csv module
1758-
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
1759-
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
1760-
will treat them as non-numeric
1761-
quotechar : string (length 1), default '\"'
1762-
character used to quote fields
1763-
doublequote : boolean, default True
1764-
Control quoting of `quotechar` inside a field
1765-
escapechar : string (length 1), default None
1766-
character used to escape `sep` and `quotechar` when appropriate
1767-
chunksize : int or None
1768-
rows to write at a time
1769-
tupleize_cols : boolean, default False
1770-
.. deprecated:: 0.21.0
1771-
This argument will be removed and will always write each row
1772-
of the multi-index as a separate row in the CSV file.
1773-
1774-
Write MultiIndex columns as a list of tuples (if True) or in
1775-
the new, expanded format, where each MultiIndex column is a row
1776-
in the CSV (if False).
1777-
date_format : string, default None
1778-
Format string for datetime objects
1779-
decimal: string, default '.'
1780-
Character recognized as decimal separator. E.g. use ',' for
1781-
European data
1782-
1783-
"""
1784-
1785-
if tupleize_cols is not None:
1786-
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
1787-
"will be removed in a future version",
1788-
FutureWarning, stacklevel=2)
1789-
else:
1790-
tupleize_cols = False
1791-
1792-
from pandas.io.formats.csvs import CSVFormatter
1793-
formatter = CSVFormatter(self, path_or_buf,
1794-
line_terminator=line_terminator, sep=sep,
1795-
encoding=encoding,
1796-
compression=compression, quoting=quoting,
1797-
na_rep=na_rep, float_format=float_format,
1798-
cols=columns, header=header, index=index,
1799-
index_label=index_label, mode=mode,
1800-
chunksize=chunksize, quotechar=quotechar,
1801-
tupleize_cols=tupleize_cols,
1802-
date_format=date_format,
1803-
doublequote=doublequote,
1804-
escapechar=escapechar, decimal=decimal)
1805-
formatter.save()
1806-
1807-
if path_or_buf is None:
1808-
return formatter.path_or_buf.getvalue()
1809-
18101713
@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
18111714
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
18121715
float_format=None, columns=None, header=True, index=True,

pandas/core/generic.py

+101
Original file line numberDiff line numberDiff line change
@@ -9161,6 +9161,107 @@ def first_valid_index(self):
91619161
def last_valid_index(self):
91629162
return self._find_valid_index('last')
91639163

9164+
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
9165+
columns=None, header=True, index=True, index_label=None,
9166+
mode='w', encoding=None, compression=None, quoting=None,
9167+
quotechar='"', line_terminator='\n', chunksize=None,
9168+
tupleize_cols=None, date_format=None, doublequote=True,
9169+
escapechar=None, decimal='.'):
9170+
r"""Write DataFrame to a comma-separated values (csv) file
9171+
9172+
Parameters
9173+
----------
9174+
path_or_buf : string or file handle, default None
9175+
File path or object, if None is provided the result is returned as
9176+
a string.
9177+
sep : character, default ','
9178+
Field delimiter for the output file.
9179+
na_rep : string, default ''
9180+
Missing data representation
9181+
float_format : string, default None
9182+
Format string for floating point numbers
9183+
columns : sequence, optional
9184+
Columns to write
9185+
header : boolean or list of string, default True
9186+
Write out the column names. If a list of strings is given it is
9187+
assumed to be aliases for the column names
9188+
index : boolean, default True
9189+
Write row names (index)
9190+
index_label : string or sequence, or False, default None
9191+
Column label for index column(s) if desired. If None is given, and
9192+
`header` and `index` are True, then the index names are used. A
9193+
sequence should be given if the DataFrame uses MultiIndex. If
9194+
False do not print fields for index names. Use index_label=False
9195+
for easier importing in R
9196+
mode : str
9197+
Python write mode, default 'w'
9198+
encoding : string, optional
9199+
A string representing the encoding to use in the output file,
9200+
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
9201+
compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None
9202+
If 'infer' and `path_or_buf` is path-like, then detect compression
9203+
from the following extensions: '.gz', '.bz2' or '.xz'
9204+
(otherwise no compression).
9205+
line_terminator : string, default ``'\n'``
9206+
The newline character or character sequence to use in the output
9207+
file
9208+
quoting : optional constant from csv module
9209+
defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
9210+
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
9211+
will treat them as non-numeric
9212+
quotechar : string (length 1), default '\"'
9213+
character used to quote fields
9214+
doublequote : boolean, default True
9215+
Control quoting of `quotechar` inside a field
9216+
escapechar : string (length 1), default None
9217+
character used to escape `sep` and `quotechar` when appropriate
9218+
chunksize : int or None
9219+
rows to write at a time
9220+
tupleize_cols : boolean, default False
9221+
.. deprecated:: 0.21.0
9222+
This argument will be removed and will always write each row
9223+
of the multi-index as a separate row in the CSV file.
9224+
9225+
Write MultiIndex columns as a list of tuples (if True) or in
9226+
the new, expanded format, where each MultiIndex column is a row
9227+
in the CSV (if False).
9228+
date_format : string, default None
9229+
Format string for datetime objects
9230+
decimal: string, default '.'
9231+
Character recognized as decimal separator. E.g. use ',' for
9232+
European data
9233+
9234+
"""
9235+
9236+
from pandas.core.frame import DataFrame
9237+
9238+
df = self if isinstance(self, DataFrame) else self.to_frame()
9239+
9240+
if tupleize_cols is not None:
9241+
warnings.warn("The 'tupleize_cols' parameter is deprecated and "
9242+
"will be removed in a future version",
9243+
FutureWarning, stacklevel=2)
9244+
else:
9245+
tupleize_cols = False
9246+
9247+
from pandas.io.formats.csvs import CSVFormatter
9248+
formatter = CSVFormatter(df, path_or_buf,
9249+
line_terminator=line_terminator, sep=sep,
9250+
encoding=encoding,
9251+
compression=compression, quoting=quoting,
9252+
na_rep=na_rep, float_format=float_format,
9253+
cols=columns, header=header, index=index,
9254+
index_label=index_label, mode=mode,
9255+
chunksize=chunksize, quotechar=quotechar,
9256+
tupleize_cols=tupleize_cols,
9257+
date_format=date_format,
9258+
doublequote=doublequote,
9259+
escapechar=escapechar, decimal=decimal)
9260+
formatter.save()
9261+
9262+
if path_or_buf is None:
9263+
return formatter.path_or_buf.getvalue()
9264+
91649265

91659266
def _doc_parms(cls):
91669267
"""Return a tuple of the doc parms."""

pandas/core/series.py

+54-50
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.core.arrays import ExtensionArray
1818
from pandas.core.dtypes.common import (
1919
is_categorical_dtype,
20+
is_string_like,
2021
is_bool,
2122
is_integer, is_integer_dtype,
2223
is_float_dtype,
@@ -3760,56 +3761,59 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
37603761

37613762
return result
37623763

3763-
def to_csv(self, path=None, index=True, sep=",", na_rep='',
3764-
float_format=None, header=False, index_label=None,
3765-
mode='w', encoding=None, compression=None, date_format=None,
3766-
decimal='.'):
3767-
"""
3768-
Write Series to a comma-separated values (csv) file
3769-
3770-
Parameters
3771-
----------
3772-
path : string or file handle, default None
3773-
File path or object, if None is provided the result is returned as
3774-
a string.
3775-
na_rep : string, default ''
3776-
Missing data representation
3777-
float_format : string, default None
3778-
Format string for floating point numbers
3779-
header : boolean, default False
3780-
Write out series name
3781-
index : boolean, default True
3782-
Write row names (index)
3783-
index_label : string or sequence, default None
3784-
Column label for index column(s) if desired. If None is given, and
3785-
`header` and `index` are True, then the index names are used. A
3786-
sequence should be given if the DataFrame uses MultiIndex.
3787-
mode : Python write mode, default 'w'
3788-
sep : character, default ","
3789-
Field delimiter for the output file.
3790-
encoding : string, optional
3791-
a string representing the encoding to use if the contents are
3792-
non-ascii, for python versions prior to 3
3793-
compression : string, optional
3794-
A string representing the compression to use in the output file.
3795-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
3796-
used when the first argument is a filename.
3797-
date_format: string, default None
3798-
Format string for datetime objects.
3799-
decimal: string, default '.'
3800-
Character recognized as decimal separator. E.g. use ',' for
3801-
European data
3802-
"""
3803-
from pandas.core.frame import DataFrame
3804-
df = DataFrame(self)
3805-
# result is only a string if no path provided, otherwise None
3806-
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
3807-
float_format=float_format, header=header,
3808-
index_label=index_label, mode=mode,
3809-
encoding=encoding, compression=compression,
3810-
date_format=date_format, decimal=decimal)
3811-
if path is None:
3812-
return result
3764+
def to_csv(self, *args, **kwargs):
3765+
"""
3766+
See DataFrame.to_csv()
3767+
# TODO
3768+
"""
3769+
3770+
names = ['path_or_buf', 'sep', 'na_rep', 'float_format', 'columns',
3771+
'header', 'index', 'index_label', 'mode', 'encoding',
3772+
'compression', 'quoting', 'quotechar', 'line_terminator',
3773+
'chunksize', 'tupleize_cols', 'date_format', 'doublequote',
3774+
'escapechar', 'decimal']
3775+
3776+
old_names = ['path_or_buf', 'index', 'sep', 'na_rep', 'float_format',
3777+
'header', 'index_label', 'mode', 'encoding',
3778+
'compression', 'date_format', 'decimal']
3779+
3780+
if 'path' in kwargs:
3781+
warnings.warn("Argument 'path' is now named 'path_or_buf'")
3782+
kwargs['path_or_buf'] = kwargs.pop('path')
3783+
3784+
if len(args) > 1:
3785+
# Either "index" (old signature) or "sep" (new signature) is being
3786+
# passed as second argument (while the first is the same)
3787+
maybe_sep = args[1]
3788+
3789+
if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
3790+
# old signature
3791+
warnings.warn("The signature of `Series.to_csv` was aligned "
3792+
"to that of `DataFrame.to_csv`. Note that the "
3793+
"order of arguments changed, and the new one "
3794+
"has 'sep' in first place, for which \"{}\" is "
3795+
"not a valid value. The old order will cease to "
3796+
"be supported in a future version. Please refer "
3797+
"to the documentation for `DataFrame.to_csv` "
3798+
"when updating your function "
3799+
"calls.".format(maybe_sep),
3800+
FutureWarning, stacklevel=2)
3801+
names = old_names
3802+
3803+
pos_args = dict(zip(names[:len(args)], args))
3804+
3805+
for key in pos_args:
3806+
if key in kwargs:
3807+
raise ValueError("Argument given by name ('{}') and position "
3808+
"({})".format(key, names.index(key)))
3809+
kwargs[key] = pos_args[key]
3810+
3811+
if kwargs.get('header', None) is None:
3812+
warnings.warn("Argument 'header' has changed default value to "
3813+
"True: please pass an explicit value to suppress "
3814+
"this warning")
3815+
3816+
return self.to_frame().to_csv(**kwargs)
38133817

38143818
@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
38153819
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

0 commit comments

Comments
 (0)