diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ed4022d422b4d..a0633a2be085a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -245,7 +245,7 @@ Deprecations - :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`). - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) -- +- The signature in :meth:`Series.to_csv` has been deprecated. Please follow the signature in :meth:`DataFrame.to_csv` instead (:issue:`19745`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6380944338010..f0aa00163c902 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1710,103 +1710,6 @@ def to_panel(self): return self._constructor_expanddim(new_mgr) - def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, - columns=None, header=True, index=True, index_label=None, - mode='w', encoding=None, compression=None, quoting=None, - quotechar='"', line_terminator='\n', chunksize=None, - tupleize_cols=None, date_format=None, doublequote=True, - escapechar=None, decimal='.'): - r"""Write DataFrame to a comma-separated values (csv) file - - Parameters - ---------- - path_or_buf : string or file handle, default None - File path or object, if None is provided the result is returned as - a string. - sep : character, default ',' - Field delimiter for the output file. - na_rep : string, default '' - Missing data representation - float_format : string, default None - Format string for floating point numbers - columns : sequence, optional - Columns to write - header : boolean or list of string, default True - Write out the column names. If a list of strings is given it is - assumed to be aliases for the column names - index : boolean, default True - Write row names (index) - index_label : string or sequence, or False, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. If - False do not print fields for index names. Use index_label=False - for easier importing in R - mode : str - Python write mode, default 'w' - encoding : string, optional - A string representing the encoding to use in the output file, - defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None - If 'infer' and `path_or_buf` is path-like, then detect compression - from the following extensions: '.gz', '.bz2' or '.xz' - (otherwise no compression). - line_terminator : string, default ``'\n'`` - The newline character or character sequence to use in the output - file - quoting : optional constant from csv module - defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` - then floats are converted to strings and thus csv.QUOTE_NONNUMERIC - will treat them as non-numeric - quotechar : string (length 1), default '\"' - character used to quote fields - doublequote : boolean, default True - Control quoting of `quotechar` inside a field - escapechar : string (length 1), default None - character used to escape `sep` and `quotechar` when appropriate - chunksize : int or None - rows to write at a time - tupleize_cols : boolean, default False - .. deprecated:: 0.21.0 - This argument will be removed and will always write each row - of the multi-index as a separate row in the CSV file. - - Write MultiIndex columns as a list of tuples (if True) or in - the new, expanded format, where each MultiIndex column is a row - in the CSV (if False). - date_format : string, default None - Format string for datetime objects - decimal: string, default '.' - Character recognized as decimal separator. E.g. use ',' for - European data - - """ - - if tupleize_cols is not None: - warnings.warn("The 'tupleize_cols' parameter is deprecated and " - "will be removed in a future version", - FutureWarning, stacklevel=2) - else: - tupleize_cols = False - - from pandas.io.formats.csvs import CSVFormatter - formatter = CSVFormatter(self, path_or_buf, - line_terminator=line_terminator, sep=sep, - encoding=encoding, - compression=compression, quoting=quoting, - na_rep=na_rep, float_format=float_format, - cols=columns, header=header, index=index, - index_label=index_label, mode=mode, - chunksize=chunksize, quotechar=quotechar, - tupleize_cols=tupleize_cols, - date_format=date_format, - doublequote=doublequote, - escapechar=escapechar, decimal=decimal) - formatter.save() - - if path_or_buf is None: - return formatter.path_or_buf.getvalue() - @Appender(_shared_docs['to_excel'] % _shared_doc_kwargs) def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8da678e0adec0..3dfce88e7e8ed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9161,6 +9161,107 @@ def first_valid_index(self): def last_valid_index(self): return self._find_valid_index('last') + def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, + columns=None, header=True, index=True, index_label=None, + mode='w', encoding=None, compression=None, quoting=None, + quotechar='"', line_terminator='\n', chunksize=None, + tupleize_cols=None, date_format=None, doublequote=True, + escapechar=None, decimal='.'): + r"""Export to a comma-separated values (CSV) file + + Parameters + ---------- + path_or_buf : string or file handle, default None + File path or object, if None is provided the result is returned as + a string. + sep : character, default ',' + Field delimiter for the output file. + na_rep : string, default '' + Missing data representation + float_format : string, default None + Format string for floating point numbers + columns : sequence, optional + Columns to write + header : boolean or list of string, default True + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names + index : boolean, default True + Write row names (index) + index_label : string or sequence, or False, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. If + False do not print fields for index names. Use index_label=False + for easier importing in R + mode : str + Python write mode, default 'w' + encoding : string, optional + A string representing the encoding to use in the output file, + defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None + If 'infer' and `path_or_buf` is path-like, then detect compression + from the following extensions: '.gz', '.bz2' or '.xz' + (otherwise no compression). + line_terminator : string, default ``'\n'`` + The newline character or character sequence to use in the output + file + quoting : optional constant from csv module + defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are converted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric + quotechar : string (length 1), default '\"' + character used to quote fields + doublequote : boolean, default True + Control quoting of `quotechar` inside a field + escapechar : string (length 1), default None + character used to escape `sep` and `quotechar` when appropriate + chunksize : int or None + rows to write at a time + tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row + of the multi-index as a separate row in the CSV file. + + Write MultiIndex columns as a list of tuples (if True) or in + the new, expanded format, where each MultiIndex column is a row + in the CSV (if False). + date_format : string, default None + Format string for datetime objects + decimal: string, default '.' + Character recognized as decimal separator. E.g. use ',' for + European data + + """ + + from pandas.core.frame import DataFrame + from pandas.io.formats.csvs import CSVFormatter + + df = self if isinstance(self, DataFrame) else DataFrame(self) + + if tupleize_cols is not None: + warnings.warn("The 'tupleize_cols' parameter is deprecated and " + "will be removed in a future version", + FutureWarning, stacklevel=2) + else: + tupleize_cols = False + + formatter = CSVFormatter(df, path_or_buf, + line_terminator=line_terminator, sep=sep, + encoding=encoding, + compression=compression, quoting=quoting, + na_rep=na_rep, float_format=float_format, + cols=columns, header=header, index=index, + index_label=index_label, mode=mode, + chunksize=chunksize, quotechar=quotechar, + tupleize_cols=tupleize_cols, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, decimal=decimal) + formatter.save() + + if path_or_buf is None: + return formatter.path_or_buf.getvalue() + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index 0bdb9d9cc23a6..df2528b00df4a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3760,24 +3760,30 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, return result - def to_csv(self, path=None, index=True, sep=",", na_rep='', - float_format=None, header=False, index_label=None, + def to_csv(self, path_or_buf=None, index=True, sep=",", na_rep='', + float_format=None, header=None, index_label=None, mode='w', encoding=None, compression=None, date_format=None, - decimal='.'): - """ - Write Series to a comma-separated values (csv) file + decimal='.', **kwargs): + """Export to a comma-separated values (CSV) file + + .. deprecated:: 0.24.0 + The signature will aligned to that of :func:`DataFrame.to_csv`. + + :func:`Series.to_csv` will align its signature with that of + `DataFrame.to_csv`. Please pass in keyword arguments in accordance + with that signature instead. Parameters ---------- - path : string or file handle, default None + path_or_buf : string or file handle, default None File path or object, if None is provided the result is returned as a string. na_rep : string, default '' Missing data representation float_format : string, default None Format string for floating point numbers - header : boolean, default False - Write out series name + header : boolean, default None + Write out Series name. By default, the name will be omitted. index : boolean, default True Write row names (index) index_label : string or sequence, default None @@ -3800,15 +3806,47 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', Character recognized as decimal separator. E.g. use ',' for European data """ + from pandas.core.frame import DataFrame df = DataFrame(self) - # result is only a string if no path provided, otherwise None - result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep, - float_format=float_format, header=header, - index_label=index_label, mode=mode, - encoding=encoding, compression=compression, - date_format=date_format, decimal=decimal) - if path is None: + + new_path_key = "path_or_buf" + old_path_key = "path" + emit_warning = False + + # For backwards compatibility, override the `path_of_buf` + # argument if a `path` keyword argument is provided. + if kwargs.get(old_path_key, None) is not None: + kwargs[new_path_key] = kwargs.pop(old_path_key) + emit_warning = True + + if header is None: + emit_warning = True + header = False + + if emit_warning: + warnings.warn("The signature of `Series.to_csv` will be " + "aligned to that of `DataFrame.to_csv` in the " + "future. Note that some of the default arguments " + "and argument names are different, so please refer " + "to the documentation for `DataFrame.to_csv` when " + "changing your function calls.", + FutureWarning, stacklevel=2) + header = False + + to_csv_kwargs = dict(path_or_buf=path_or_buf, index=index, sep=sep, + na_rep=na_rep, float_format=float_format, + header=header, index_label=index_label, + mode=mode, encoding=encoding, + compression=compression, + date_format=date_format, + decimal=decimal) + to_csv_kwargs.update(**kwargs) + + # Result is only a string if no path provided, otherwise None. + result = df.to_csv(**to_csv_kwargs) + + if to_csv_kwargs[new_path_key] is None: return result @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 3ad25ae73109e..45ed424f006cd 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -893,14 +893,16 @@ def test_to_csv_line_terminators(self): def test_to_csv_from_csv_categorical(self): - # CSV with categoricals should result in the same output as when one - # would add a "normal" Series/DataFrame. + # CSV with Categoricals should result in the same output + # as when one would add a "normal" Series/DataFrame. s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) res = StringIO() - s.to_csv(res) + + s.to_csv(res, header=False) exp = StringIO() - s2.to_csv(exp) + + s2.to_csv(exp, header=False) assert res.getvalue() == exp.getvalue() df = DataFrame({"s": s}) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 814d794d45c18..6d653445b324b 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -37,7 +37,7 @@ def read_csv(self, path, **kwargs): def test_from_csv_deprecation(self): # see gh-17812 with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -45,10 +45,28 @@ def test_from_csv_deprecation(self): depr_ts = Series.from_csv(path) assert_series_equal(depr_ts, ts) + @pytest.mark.parametrize("arg", ["path", "header", "both"]) + def test_to_csv_deprecation(self, arg): + # see gh-19745 + with ensure_clean() as path: + if arg == "path": + kwargs = dict(path=path, header=False) + elif arg == "header": + kwargs = dict(path_or_buf=path) + else: # Both discrepancies match. + kwargs = dict(path=path) + + with tm.assert_produces_warning(FutureWarning): + self.ts.to_csv(**kwargs) + + # Make sure roundtrip still works. + ts = self.read_csv(path) + assert_series_equal(self.ts, ts, check_names=False) + def test_from_csv(self): with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) ts = self.read_csv(path) assert_series_equal(self.ts, ts, check_names=False) @@ -65,7 +83,7 @@ def test_from_csv(self): ts_h = self.read_csv(path, header=0) assert ts_h.name == "ts" - self.series.to_csv(path) + self.series.to_csv(path, header=False) series = self.read_csv(path) assert_series_equal(self.series, series, check_names=False) @@ -92,13 +110,13 @@ def test_to_csv(self): import io with ensure_clean() as path: - self.ts.to_csv(path) + self.ts.to_csv(path, header=False) with io.open(path, newline=None) as f: lines = f.readlines() assert (lines[1] != '\n') - self.ts.to_csv(path, index=False) + self.ts.to_csv(path, index=False, header=False) arr = np.loadtxt(path) assert_almost_equal(arr, self.ts.values) @@ -106,7 +124,7 @@ def test_to_csv_unicode_index(self): buf = StringIO() s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) - s.to_csv(buf, encoding="UTF-8") + s.to_csv(buf, encoding="UTF-8", header=False) buf.seek(0) s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") @@ -116,7 +134,7 @@ def test_to_csv_float_format(self): with ensure_clean() as filename: ser = Series([0.123456, 0.234567, 0.567567]) - ser.to_csv(filename, float_format="%.2f") + ser.to_csv(filename, float_format="%.2f", header=False) rs = self.read_csv(filename) xp = Series([0.12, 0.23, 0.57]) @@ -128,14 +146,15 @@ def test_to_csv_list_entries(self): split = s.str.split(r'\s+and\s+') buf = StringIO() - split.to_csv(buf) + split.to_csv(buf, header=False) def test_to_csv_path_is_none(self): - # GH 8215 + # see gh-8215 + # # Series.to_csv() was returning None, inconsistent with # DataFrame.to_csv() which returned string s = Series([1, 2, 3]) - csv_str = s.to_csv(path=None) + csv_str = s.to_csv(None, header=False) assert isinstance(csv_str, str) @pytest.mark.parametrize('s,encoding', [ @@ -150,8 +169,8 @@ def test_to_csv_compression(self, s, encoding, compression): with ensure_clean() as filename: - s.to_csv(filename, compression=compression, encoding=encoding, - header=True) + s.to_csv(filename, compression=compression, + encoding=encoding, header=True) # test the round trip - to_csv -> read_csv result = pd.read_csv(filename, compression=compression, encoding=encoding, index_col=0, squeeze=True) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 61f838eeeeb30..21f34aa3995d7 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -2,6 +2,7 @@ import pytest import os +import warnings import collections from functools import partial @@ -15,6 +16,14 @@ import pandas.util.testing as tm +def catch_to_csv_depr(): + # Catching warnings because Series.to_csv has + # been deprecated. Remove this context when + # Series.to_csv has been aligned. + + return warnings.catch_warnings(record=True) + + def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" with tm.assert_raises_regex(TypeError, msg): @@ -219,11 +228,12 @@ def test_standardize_mapping(): def test_compression_size(obj, method, compression_only): with tm.ensure_clean() as filename: - getattr(obj, method)(filename, compression=compression_only) - compressed = os.path.getsize(filename) - getattr(obj, method)(filename, compression=None) - uncompressed = os.path.getsize(filename) - assert uncompressed > compressed + with catch_to_csv_depr(): + getattr(obj, method)(filename, compression=compression_only) + compressed = os.path.getsize(filename) + getattr(obj, method)(filename, compression=None) + uncompressed = os.path.getsize(filename) + assert uncompressed > compressed @pytest.mark.parametrize('obj', [ @@ -236,16 +246,22 @@ def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as filename: f, _handles = _get_handle(filename, 'w', compression=compression_only) - with f: - getattr(obj, method)(f) - assert not f.closed + + with catch_to_csv_depr(): + with f: + getattr(obj, method)(f) + assert not f.closed assert f.closed compressed = os.path.getsize(filename) + with tm.ensure_clean() as filename: f, _handles = _get_handle(filename, 'w', compression=None) - with f: - getattr(obj, method)(f) - assert not f.closed + + with catch_to_csv_depr(): + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed uncompressed = os.path.getsize(filename) assert uncompressed > compressed