Skip to content

Commit ff42832

Browse files
committed
EHN: Add encoding_errors option in pandas.DataFrame.to_csv (#27750)
encoding_errors : str, default 'strict' Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.) See: https://docs.python.org/3/library/codecs.html#codec-base-classes
1 parent 9d7a282 commit ff42832

File tree

5 files changed

+484
-4
lines changed

5 files changed

+484
-4
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ including other versions of pandas.
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24-
-
24+
- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
2525
-
2626

2727
.. _whatsnew_1000.enhancements.other:

pandas/core/generic.py

+6
Original file line numberDiff line numberDiff line change
@@ -3069,6 +3069,7 @@ def to_csv(
30693069
doublequote=True,
30703070
escapechar=None,
30713071
decimal=".",
3072+
encoding_errors="strict",
30723073
):
30733074
r"""
30743075
Write object to a comma-separated values (csv) file.
@@ -3151,6 +3152,10 @@ def to_csv(
31513152
decimal : str, default '.'
31523153
Character recognized as decimal separator. E.g. use ',' for
31533154
European data.
3155+
encoding_errors : str, default 'strict'
3156+
Behavior when the input string can’t be converted according to
3157+
the encoding’s rules (strict, ignore, replace, etc.)
3158+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
31543159
31553160
Returns
31563161
-------
@@ -3197,6 +3202,7 @@ def to_csv(
31973202
doublequote=doublequote,
31983203
escapechar=escapechar,
31993204
decimal=decimal,
3205+
encoding_errors=encoding_errors,
32003206
)
32013207
formatter.save()
32023208

pandas/io/common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,13 @@ def _infer_compression(
310310

311311

312312
def _get_handle(
313-
path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True
313+
path_or_buf,
314+
mode,
315+
encoding=None,
316+
compression=None,
317+
memory_map=False,
318+
is_text=True,
319+
encoding_errors="strict",
314320
):
315321
"""
316322
Get file handle for given path/buffer and mode.
@@ -331,6 +337,10 @@ def _get_handle(
331337
is_text : boolean, default True
332338
whether file/buffer is in text format (csv, json, etc.), or in binary
333339
mode (pickle, etc.)
340+
encoding_errors : str, default 'strict'
341+
Behavior when the input string can’t be converted according to
342+
the encoding’s rules (strict, ignore, replace, etc.)
343+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
334344
335345
Returns
336346
-------
@@ -407,10 +417,10 @@ def _get_handle(
407417
elif is_path:
408418
if encoding:
409419
# Encoding
410-
f = open(path_or_buf, mode, encoding=encoding, newline="")
420+
f = open(path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline="")
411421
elif is_text:
412422
# No explicit encoding
413-
f = open(path_or_buf, mode, errors="replace", newline="")
423+
f = open(path_or_buf, mode, errors=encoding_errors, newline="")
414424
else:
415425
# Binary mode
416426
f = open(path_or_buf, mode)

pandas/io/formats/csvs.py

+4
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(
5151
doublequote=True,
5252
escapechar=None,
5353
decimal=".",
54+
encoding_errors="strict",
5455
):
5556

5657
self.obj = obj
@@ -93,6 +94,8 @@ def __init__(
9394

9495
self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)
9596

97+
self.encoding_errors = encoding_errors
98+
9699
# validate mi options
97100
if self.has_mi_columns:
98101
if cols is not None:
@@ -179,6 +182,7 @@ def save(self):
179182
self.mode,
180183
encoding=self.encoding,
181184
compression=self.compression,
185+
encoding_errors=self.encoding_errors,
182186
)
183187
close = True
184188

0 commit comments

Comments
 (0)