Skip to content

Commit 43a463d

Browse files
BUG: Add errors argument to to_csv() call to enable error handling for encoders (#32702)
1 parent 9a2e821 commit 43a463d

File tree

5 files changed

+32
-2
lines changed

5 files changed

+32
-2
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ Other enhancements
288288
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
289289
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
290290
- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
291+
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
291292
- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
292293
- :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
293294
- :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).

pandas/core/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -3049,6 +3049,7 @@ def to_csv(
30493049
doublequote: bool_t = True,
30503050
escapechar: Optional[str] = None,
30513051
decimal: Optional[str] = ".",
3052+
errors: str = "strict",
30523053
) -> Optional[str]:
30533054
r"""
30543055
Write object to a comma-separated values (csv) file.
@@ -3143,6 +3144,12 @@ def to_csv(
31433144
decimal : str, default '.'
31443145
Character recognized as decimal separator. E.g. use ',' for
31453146
European data.
3147+
errors : str, default 'strict'
3148+
Specifies how encoding and decoding errors are to be handled.
3149+
See the errors argument for :func:`open` for a full list
3150+
of options.
3151+
3152+
.. versionadded:: 1.1.0
31463153
31473154
Returns
31483155
-------
@@ -3180,6 +3187,7 @@ def to_csv(
31803187
line_terminator=line_terminator,
31813188
sep=sep,
31823189
encoding=encoding,
3190+
errors=errors,
31833191
compression=compression,
31843192
quoting=quoting,
31853193
na_rep=na_rep,

pandas/io/common.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def get_handle(
352352
compression: Optional[Union[str, Mapping[str, Any]]] = None,
353353
memory_map: bool = False,
354354
is_text: bool = True,
355+
errors=None,
355356
):
356357
"""
357358
Get file handle for given path/buffer and mode.
@@ -390,6 +391,12 @@ def get_handle(
390391
is_text : boolean, default True
391392
whether file/buffer is in text format (csv, json, etc.), or in binary
392393
mode (pickle, etc.).
394+
errors : str, default 'strict'
395+
Specifies how encoding and decoding errors are to be handled.
396+
See the errors argument for :func:`open` for a full list
397+
of options.
398+
399+
.. versionadded:: 1.1.0
393400
394401
Returns
395402
-------
@@ -475,7 +482,7 @@ def get_handle(
475482
elif is_path:
476483
if encoding:
477484
# Encoding
478-
f = open(path_or_buf, mode, encoding=encoding, newline="")
485+
f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="")
479486
elif is_text:
480487
# No explicit encoding
481488
f = open(path_or_buf, mode, errors="replace", newline="")
@@ -488,7 +495,7 @@ def get_handle(
488495
if is_text and (compression or isinstance(f, need_text_wrapping)):
489496
from io import TextIOWrapper
490497

491-
g = TextIOWrapper(f, encoding=encoding, newline="")
498+
g = TextIOWrapper(f, encoding=encoding, errors=errors, newline="")
492499
if not isinstance(f, (BufferedIOBase, RawIOBase)):
493500
handles.append(g)
494501
f = g

pandas/io/formats/csvs.py

+4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(
4444
index_label: Optional[Union[bool, Hashable, Sequence[Hashable]]] = None,
4545
mode: str = "w",
4646
encoding: Optional[str] = None,
47+
errors: str = "strict",
4748
compression: Union[str, Mapping[str, str], None] = "infer",
4849
quoting: Optional[int] = None,
4950
line_terminator="\n",
@@ -77,6 +78,7 @@ def __init__(
7778
if encoding is None:
7879
encoding = "utf-8"
7980
self.encoding = encoding
81+
self.errors = errors
8082
self.compression = infer_compression(self.path_or_buf, compression)
8183

8284
if quoting is None:
@@ -184,6 +186,7 @@ def save(self) -> None:
184186
self.path_or_buf,
185187
self.mode,
186188
encoding=self.encoding,
189+
errors=self.errors,
187190
compression=dict(self.compression_args, method=self.compression),
188191
)
189192
close = True
@@ -215,6 +218,7 @@ def save(self) -> None:
215218
self.path_or_buf,
216219
self.mode,
217220
encoding=self.encoding,
221+
errors=self.errors,
218222
compression=compression,
219223
)
220224
f.write(buf)

pandas/tests/io/formats/test_to_csv.py

+10
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,13 @@ def test_na_rep_truncated(self):
597597
result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
598598
expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
599599
assert result == expected
600+
601+
@pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
602+
def test_to_csv_errors(self, errors):
603+
# GH 22610
604+
data = ["\ud800foo"]
605+
ser = pd.Series(data, index=pd.Index(data))
606+
with tm.ensure_clean("test.csv") as path:
607+
ser.to_csv(path, errors=errors)
608+
# No use in reading back the data as it is not the same anymore
609+
# due to the error handling

0 commit comments

Comments
 (0)