DEPR: line_terminator->lineterminator GH#9569 (#45302)

jbrockmendel · web-flow · commit a1ed4b203c6a · 2022-01-12T08:46:15.000-05:00
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1851,7 +1851,7 @@ function takes a number of arguments. Only the first is required.
 * ``mode`` : Python write mode, default 'w'
 * ``encoding``: a string representing the encoding to use if the contents are
   non-ASCII, for Python versions prior to 3
-* ``line_terminator``: Character sequence denoting line end (default ``os.linesep``)
+* ``lineterminator``: Character sequence denoting line end (default ``os.linesep``)
 * ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a ``float_format`` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric
 * ``quotechar``: Character used to quote fields (default '"')
 * ``doublequote``: Control quoting of ``quotechar`` in fields (default True)
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -94,7 +94,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
--
+- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -649,7 +649,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
 #define END_LINE() END_LINE_STATE(START_RECORD)
 
 #define IS_TERMINATOR(c)                            \
-    (c == line_terminator)
+    (c == lineterminator)
 
 #define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE))
 
@@ -718,7 +718,7 @@ int tokenize_bytes(parser_t *self,
     char *stream;
     char *buf = self->data + self->datapos;
 
-    const char line_terminator = (self->lineterminator == '\0') ?
+    const char lineterminator = (self->lineterminator == '\0') ?
             '\n' : self->lineterminator;
 
     // 1000 is something that couldn't fit in "char"
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -67,6 +67,7 @@
     InvalidIndexError,
 )
 from pandas.util._decorators import (
+    deprecate_kwarg,
     doc,
     rewrite_axis_style_signature,
 )
@@ -3355,6 +3356,7 @@ def to_latex(
         storage_options=_shared_docs["storage_options"],
         compression_options=_shared_docs["compression_options"],
     )
+    @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")
     def to_csv(
         self,
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
@@ -3370,7 +3372,7 @@ def to_csv(
         compression: CompressionOptions = "infer",
         quoting: int | None = None,
         quotechar: str = '"',
-        line_terminator: str | None = None,
+        lineterminator: str | None = None,
         chunksize: int | None = None,
         date_format: str | None = None,
         doublequote: bool_t = True,
@@ -3449,10 +3451,16 @@ def to_csv(
             will treat them as non-numeric.
         quotechar : str, default '\"'
             String of length 1. Character used to quote fields.
-        line_terminator : str, optional
+        lineterminator : str, optional
             The newline character or character sequence to use in the output
             file. Defaults to `os.linesep`, which depends on the OS in which
             this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
+
+            .. versionchanged:: 1.5.0
+
+                Previously was line_terminator, changed for consistency with
+                read_csv and the standard library 'csv' module.
+
         chunksize : int or None
             Rows to write at a time.
         date_format : str, default None
@@ -3527,7 +3535,7 @@ def to_csv(
 
         return DataFrameRenderer(formatter).to_csv(
             path_or_buf,
-            line_terminator=line_terminator,
+            lineterminator=lineterminator,
             sep=sep,
             encoding=encoding,
             errors=errors,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -59,7 +59,7 @@ def __init__(
         errors: str = "strict",
         compression: CompressionOptions = "infer",
         quoting: int | None = None,
-        line_terminator: str | None = "\n",
+        lineterminator: str | None = "\n",
         chunksize: int | None = None,
         quotechar: str | None = '"',
         date_format: str | None = None,
@@ -84,7 +84,7 @@ def __init__(
         self.quotechar = self._initialize_quotechar(quotechar)
         self.doublequote = doublequote
         self.escapechar = escapechar
-        self.line_terminator = line_terminator or os.linesep
+        self.lineterminator = lineterminator or os.linesep
         self.date_format = date_format
         self.cols = self._initialize_columns(cols)
         self.chunksize = self._initialize_chunksize(chunksize)
@@ -250,7 +250,7 @@ def save(self) -> None:
             # Note: self.encoding is irrelevant here
             self.writer = csvlib.writer(
                 handles.handle,
-                lineterminator=self.line_terminator,
+                lineterminator=self.lineterminator,
                 delimiter=self.sep,
                 quoting=self.quoting,
                 doublequote=self.doublequote,
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -57,6 +57,7 @@
     StorageOptions,
     WriteBuffer,
 )
+from pandas.util._decorators import deprecate_kwarg
 
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -1128,6 +1129,7 @@ def to_string(
         string = string_formatter.to_string()
         return save_to_buffer(string, buf=buf, encoding=encoding)
 
+    @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")
     def to_csv(
         self,
         path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
@@ -1139,7 +1141,7 @@ def to_csv(
         compression: CompressionOptions = "infer",
         quoting: int | None = None,
         quotechar: str = '"',
-        line_terminator: str | None = None,
+        lineterminator: str | None = None,
         chunksize: int | None = None,
         date_format: str | None = None,
         doublequote: bool = True,
@@ -1160,7 +1162,7 @@ def to_csv(
 
         csv_formatter = CSVFormatter(
             path_or_buf=path_or_buf,
-            line_terminator=line_terminator,
+            lineterminator=lineterminator,
             sep=sep,
             encoding=encoding,
             errors=errors,
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -866,21 +866,21 @@ def test_to_csv_index_no_leading_comma(self):
         expected = tm.convert_rows_list_to_csv_str(expected_rows)
         assert buf.getvalue() == expected
 
-    def test_to_csv_line_terminators(self):
+    def test_to_csv_lineterminators(self):
         # see gh-20353
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"])
 
         with tm.ensure_clean() as path:
             # case 1: CRLF as line terminator
-            df.to_csv(path, line_terminator="\r\n")
+            df.to_csv(path, lineterminator="\r\n")
             expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n"
 
             with open(path, mode="rb") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean() as path:
             # case 2: LF as line terminator
-            df.to_csv(path, line_terminator="\n")
+            df.to_csv(path, lineterminator="\n")
             expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n"
 
             with open(path, mode="rb") as f:
@@ -1251,7 +1251,7 @@ def test_to_csv_single_level_multi_index(self):
         df = DataFrame([[1, 2, 3]], columns=index)
         df = df.reindex(columns=[(1,), (3,)])
         expected = ",1,3\n0,1,3\n"
-        result = df.to_csv(line_terminator="\n")
+        result = df.to_csv(lineterminator="\n")
         tm.assert_almost_equal(result, expected)
 
     def test_gz_lineend(self):
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -369,9 +369,11 @@ def test_to_csv_multi_index(self):
     @pytest.mark.parametrize("klass", [DataFrame, pd.Series])
     def test_to_csv_single_level_multi_index(self, ind, expected, klass):
         # see gh-19589
-        result = klass(pd.Series([1], ind, name="data")).to_csv(
-            line_terminator="\n", header=True
-        )
+        obj = klass(pd.Series([1], ind, name="data"))
+
+        with tm.assert_produces_warning(FutureWarning, match="lineterminator"):
+            # GH#9568 standardize on lineterminator matching stdlib
+            result = obj.to_csv(line_terminator="\n", header=True)
         assert result == expected
 
     def test_to_csv_string_array_ascii(self):
@@ -425,14 +427,14 @@ def test_to_csv_string_with_lf(self):
         with tm.ensure_clean("lf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
-            df.to_csv(path, line_terminator="\n", index=False)
+            df.to_csv(path, lineterminator="\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
         with tm.ensure_clean("lf_test.csv") as path:
             # case 3: CRLF as line terminator
-            # 'line_terminator' should not change inner element
+            # 'lineterminator' should not change inner element
             expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
-            df.to_csv(path, line_terminator="\r\n", index=False)
+            df.to_csv(path, lineterminator="\r\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
@@ -459,19 +461,19 @@ def test_to_csv_string_with_crlf(self):
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 2: LF as line terminator
             expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
-            df.to_csv(path, line_terminator="\n", index=False)
+            df.to_csv(path, lineterminator="\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 3: CRLF as line terminator
-            # 'line_terminator' should not change inner element
+            # 'lineterminator' should not change inner element
             expected_crlf = (
                 b"int,str_crlf\r\n"
                 b"1,abc\r\n"
                 b'2,"d\r\nef"\r\n'
                 b'3,"g\r\nh\r\n\r\ni"\r\n'
             )
-            df.to_csv(path, line_terminator="\r\n", index=False)
+            df.to_csv(path, lineterminator="\r\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
 
diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
@@ -180,9 +180,9 @@ def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
 
 
 @pytest.mark.parametrize(
-    "line_terminator", ["\n", "\r\n", "\r"]  # "LF"  # "CRLF"  # "CR"
+    "lineterminator", ["\n", "\r\n", "\r"]  # "LF"  # "CRLF"  # "CR"
 )
-def test_skiprows_lineterminator(all_parsers, line_terminator, request):
+def test_skiprows_lineterminator(all_parsers, lineterminator, request):
     # see gh-9079
     parser = all_parsers
     data = "\n".join(
@@ -202,11 +202,11 @@ def test_skiprows_lineterminator(all_parsers, line_terminator, request):
         columns=["date", "time", "var", "flag", "oflag"],
     )
 
-    if parser.engine == "python" and line_terminator == "\r":
+    if parser.engine == "python" and lineterminator == "\r":
         mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet")
         request.node.add_marker(mark)
 
-    data = data.replace("\n", line_terminator)
+    data = data.replace("\n", lineterminator)
     result = parser.read_csv(
         StringIO(data),
         skiprows=1,
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
@@ -1160,7 +1160,7 @@ def test_style_to_csv():
     </xsl:template>
 </xsl:stylesheet>"""
 
-    out_csv = geom_df.to_csv(line_terminator="\n")
+    out_csv = geom_df.to_csv(lineterminator="\n")
 
     if out_csv is not None:
         out_csv = out_csv.strip()

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ Other API changes`
`94`	`94`
`95`	`95`	`Deprecations`
`96`	`96`	`~~~~~~~~~~~~`
`97`		`--`
	`97`	+- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
`98`	`98`	`-`
`99`	`99`
`100`	`100`	`.. ---------------------------------------------------------------------------`