Skip to content

Commit 959eee3

Browse files
committed
EHN: Add encoding_errors option in pandas.DataFrame.to_csv (#27750)
encoding_errors : str, default 'strict' Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.) See: https://docs.python.org/3/library/codecs.html#codec-base-classes
1 parent 9d7a282 commit 959eee3

File tree

6 files changed

+499
-4
lines changed

6 files changed

+499
-4
lines changed

doc/source/user_guide/io.rst

+7
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,11 @@ encoding : str, default ``None``
332332
Encoding to use for UTF when reading/writing (e.g. ``'utf-8'``). `List of
333333
Python standard encodings
334334
<https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
335+
encoding_errors : str, default 'strict'
336+
Behavior when the input string can’t be converted according to
337+
the encoding’s rules (strict, ignore, replace, etc.)
338+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
339+
.. versionadded:: 1.0.0
335340
dialect : str or :class:`python:csv.Dialect` instance, default ``None``
336341
If provided, this parameter will override values (default or not) for the
337342
following parameters: `delimiter`, `doublequote`, `escapechar`,
@@ -1708,6 +1713,8 @@ function takes a number of arguments. Only the first is required.
17081713
appropriate (default None)
17091714
* ``chunksize``: Number of rows to write at a time
17101715
* ``date_format``: Format string for datetime objects
1716+
* ``encoding_errors``: Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.)
1717+
.. versionadded:: 1.0.0
17111718

17121719
Writing a formatted string
17131720
++++++++++++++++++++++++++

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ including other versions of pandas.
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24-
-
24+
- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
2525
-
2626

2727
.. _whatsnew_1000.enhancements.other:

pandas/core/generic.py

+7
Original file line numberDiff line numberDiff line change
@@ -3069,6 +3069,7 @@ def to_csv(
30693069
doublequote=True,
30703070
escapechar=None,
30713071
decimal=".",
3072+
encoding_errors="strict",
30723073
):
30733074
r"""
30743075
Write object to a comma-separated values (csv) file.
@@ -3151,6 +3152,11 @@ def to_csv(
31513152
decimal : str, default '.'
31523153
Character recognized as decimal separator. E.g. use ',' for
31533154
European data.
3155+
encoding_errors : str, default 'strict'
3156+
Behavior when the input string can’t be converted according to
3157+
the encoding’s rules (strict, ignore, replace, etc.)
3158+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
3159+
.. versionadded:: 1.0.0
31543160
31553161
Returns
31563162
-------
@@ -3197,6 +3203,7 @@ def to_csv(
31973203
doublequote=doublequote,
31983204
escapechar=escapechar,
31993205
decimal=decimal,
3206+
encoding_errors=encoding_errors,
32003207
)
32013208
formatter.save()
32023209

pandas/io/common.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,13 @@ def _infer_compression(
310310

311311

312312
def _get_handle(
313-
path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True
313+
path_or_buf,
314+
mode,
315+
encoding=None,
316+
compression=None,
317+
memory_map=False,
318+
is_text=True,
319+
encoding_errors="strict",
314320
):
315321
"""
316322
Get file handle for given path/buffer and mode.
@@ -331,6 +337,11 @@ def _get_handle(
331337
is_text : boolean, default True
332338
whether file/buffer is in text format (csv, json, etc.), or in binary
333339
mode (pickle, etc.)
340+
encoding_errors : str, default 'strict'
341+
Behavior when the input string can’t be converted according to
342+
the encoding’s rules (strict, ignore, replace, etc.)
343+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
344+
.. versionadded:: 1.0.0
334345
335346
Returns
336347
-------
@@ -407,10 +418,12 @@ def _get_handle(
407418
elif is_path:
408419
if encoding:
409420
# Encoding
410-
f = open(path_or_buf, mode, encoding=encoding, newline="")
421+
f = open(
422+
path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline=""
423+
)
411424
elif is_text:
412425
# No explicit encoding
413-
f = open(path_or_buf, mode, errors="replace", newline="")
426+
f = open(path_or_buf, mode, errors=encoding_errors, newline="")
414427
else:
415428
# Binary mode
416429
f = open(path_or_buf, mode)

pandas/io/formats/csvs.py

+4
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(
5151
doublequote=True,
5252
escapechar=None,
5353
decimal=".",
54+
encoding_errors="strict",
5455
):
5556

5657
self.obj = obj
@@ -93,6 +94,8 @@ def __init__(
9394

9495
self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)
9596

97+
self.encoding_errors = encoding_errors
98+
9699
# validate mi options
97100
if self.has_mi_columns:
98101
if cols is not None:
@@ -179,6 +182,7 @@ def save(self):
179182
self.mode,
180183
encoding=self.encoding,
181184
compression=self.compression,
185+
encoding_errors=self.encoding_errors,
182186
)
183187
close = True
184188

0 commit comments

Comments
 (0)