diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fe412bc0ce937..2238c3b01e48e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3065,6 +3065,7 @@ def to_csv( decimal: Optional[str] = ".", errors: str = "strict", storage_options: StorageOptions = None, + **kwargs, ) -> Optional[str]: r""" Write object to a comma-separated values (csv) file. @@ -3179,9 +3180,6 @@ def to_csv( Specifies how encoding and decoding errors are to be handled. See the errors argument for :func:`open` for a full list of options. - - .. versionadded:: 1.1.0 - storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc., if using a URL that will @@ -3190,6 +3188,11 @@ def to_csv( a file-like buffer. See the fsspec and backend storage implementation docs for the set of allowed keys and values + .. versionadded:: 1.2.0 + kwargs + Additional keyword arguments passed to ``pd.to_csv`` for compatibility + with `csv` module. Include `lineterminator` (an alias of `line_terminator`). + .. versionadded:: 1.2.0 Returns diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5d49757ce7d58..0f312e4f6b431 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -27,7 +27,7 @@ ParserError, ParserWarning, ) -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, _get_alias_from_kwargs from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( @@ -285,7 +285,7 @@ Thousands separator. decimal : str, default '.' Character to recognize as decimal point (e.g. use ',' for European data). -lineterminator : str (length 1), optional +line_terminator : str (length 1), optional Character to break file into lines. Only valid with C parser. quotechar : str (length 1), optional The character used to denote the start and end of a quoted item. Quoted @@ -346,6 +346,11 @@ values. The options are `None` for the ordinary converter, `high` for the high-precision converter, and `round_trip` for the round-trip converter. +kwargs + Additional keyword arguments passed to ``pd.read_csv`` for compatibility + with `csv` module. Include `lineterminator` (an alias of `line_terminator`). + + .. versionadded:: 1.2.0 Returns ------- @@ -580,7 +585,7 @@ def read_csv( compression="infer", thousands=None, decimal: str = ".", - lineterminator=None, + line_terminator=None, quotechar='"', quoting=csv.QUOTE_MINIMAL, doublequote=True, @@ -597,6 +602,7 @@ def read_csv( memory_map=False, float_precision=None, storage_options=None, + **kwargs, ): # gh-23761 # @@ -634,6 +640,8 @@ def read_csv( engine = "c" engine_specified = False + kwargs.setdefault("lineterminator", line_terminator) + kwds.update( delimiter=delimiter, engine=engine, @@ -645,7 +653,6 @@ def read_csv( quotechar=quotechar, quoting=quoting, skipinitialspace=skipinitialspace, - lineterminator=lineterminator, header=header, index_col=index_col, names=names, @@ -684,6 +691,7 @@ def read_csv( infer_datetime_format=infer_datetime_format, skip_blank_lines=skip_blank_lines, storage_options=storage_options, + **kwargs, ) return _read(filepath_or_buffer, kwds) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index db7347bb863a5..da8c5a1ffa4bd 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1,6 +1,7 @@ import csv from io import StringIO import os +import re import numpy as np import pytest @@ -998,6 +999,45 @@ def test_to_csv_line_terminators(self): with open(path, mode="rb") as f: assert f.read() == expected + def test_to_csv_lineterminator_alternative_args(self): + # GH 9568 + # examples from test_to_csv_line_terminators + # test equivalence of line_terminator vs. lineterminator keyword args + + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + # case 1: CRLF as line terminator + + with tm.ensure_clean() as path: + df.to_csv(path, line_terminator="\r\n") + + with open(path, mode="rb") as f: + res_line_terminator = f.read() + + with tm.ensure_clean() as path: + df.to_csv(path, lineterminator="\r\n") + + with open(path, mode="rb") as f: + res_lineterminator = f.read() + + assert re.match(res_line_terminator, res_lineterminator) + + # case 2: LF as line terminator + + with tm.ensure_clean() as path: + df.to_csv(path, line_terminator="\n") + + with open(path, mode="rb") as f: + res_line_terminator = f.read() + + with tm.ensure_clean() as path: + df.to_csv(path, lineterminator="\n") + + with open(path, mode="rb") as f: + res_lineterminator = f.read() + + assert re.match(res_line_terminator, res_lineterminator) + def test_to_csv_from_csv_categorical(self): # CSV with categoricals should result in the same output diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 753b8b6eda9c5..7104b6de4dd6c 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,5 +1,6 @@ import io import os +import re import sys import numpy as np @@ -330,10 +331,15 @@ def test_to_csv_multi_index(self): @pytest.mark.parametrize("klass", [pd.DataFrame, pd.Series]) def test_to_csv_single_level_multi_index(self, ind, expected, klass): # see gh-19589 - result = klass(pd.Series([1], ind, name="data")).to_csv( + # GH9568 test for equivalence between line_terminator and lineterminator + result_line_terminator = klass(pd.Series([1], ind, name="data")).to_csv( line_terminator="\n", header=True ) - assert result == expected + result_lineterminator = klass(pd.Series([1], ind, name="data")).to_csv( + lineterminator="\n", header=True + ) + assert re.match(result_lineterminator, result_line_terminator) + assert re.match(result_line_terminator, expected) def test_to_csv_string_array_ascii(self): # GH 10813 @@ -436,6 +442,25 @@ def test_to_csv_string_with_crlf(self): with open(path, "rb") as f: assert f.read() == expected_crlf + def test_to_csv_string_line_terminator_alternative_args(self): + # GH 9568 + # test equivalence of line_terminator vs. lineterminator keyword args + + data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]} + df = pd.DataFrame(data) + + with tm.ensure_clean("crlf_test.csv") as path: + df.to_csv(path, line_terminator="\n", index=False) + with open(path, "rb") as f: + res_line_terminator = f.read() + + with tm.ensure_clean("crlf_test.csv") as path: + df.to_csv(path, lineterminator="\n", index=False) + with open(path, "rb") as f: + res_lineterminator = f.read() + + assert re.match(res_line_terminator, res_lineterminator) + def test_to_csv_stdout_file(self, capsys): # GH 21561 df = pd.DataFrame( diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 1d8d5a29686a4..86ebd51643601 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2080,8 +2080,6 @@ def test_unexpected_keyword_parameter_exception(all_parsers): parser = all_parsers msg = "{}\\(\\) got an unexpected keyword argument 'foo'" - with pytest.raises(TypeError, match=msg.format("read_csv")): - parser.read_csv("foo.csv", foo=1) with pytest.raises(TypeError, match=msg.format("read_table")): parser.read_table("foo.tsv", foo=1)