Skip to content

Commit 2a42c1c

Browse files
Backport PR #38997: REGR: errors='replace' when encoding/errors are not specified (#39021)
Co-authored-by: Torsten Wörtwein <[email protected]>
1 parent 3dd9561 commit 2a42c1c

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2525
- Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`)
2626
- Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`)
27+
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
2728

2829
.. ---------------------------------------------------------------------------
2930

pandas/io/common.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -547,8 +547,7 @@ def get_handle(
547547
Returns the dataclass IOHandles
548548
"""
549549
# Windows does not default to utf-8. Set to utf-8 for a consistent behavior
550-
if encoding is None:
551-
encoding = "utf-8"
550+
encoding_passed, encoding = encoding, encoding or "utf-8"
552551

553552
# read_csv does not know whether the buffer is opened in binary/text mode
554553
if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
@@ -635,6 +634,9 @@ def get_handle(
635634
# Check whether the filename is to be opened in binary mode.
636635
# Binary mode does not support 'encoding' and 'newline'.
637636
if ioargs.encoding and "b" not in ioargs.mode:
637+
if errors is None and encoding_passed is None:
638+
# ignore errors when no encoding is specified
639+
errors = "replace"
638640
# Encoding
639641
handle = open(
640642
handle,

pandas/tests/io/test_common.py

+8
Original file line numberDiff line numberDiff line change
@@ -418,3 +418,11 @@ def test_is_fsspec_url():
418418
assert not icom.is_fsspec_url("random:pandas/somethingelse.com")
419419
assert not icom.is_fsspec_url("/local/path")
420420
assert not icom.is_fsspec_url("relative/local/path")
421+
422+
423+
def test_default_errors():
424+
# GH 38989
425+
with tm.ensure_clean() as path:
426+
file = Path(path)
427+
file.write_bytes(b"\xe4\na\n1")
428+
tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))

0 commit comments

Comments
 (0)