Skip to content

Commit 2244869

Browse files
Add fix for pandas-dev#59242
1 parent 68d9dca commit 2244869

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

doc/source/whatsnew/v2.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ MultiIndex
130130

131131
I/O
132132
^^^
133-
-
133+
- Bug in :func:`read_sql` causing an unintended exception when byte data was being converted to string when using the pyarrow dtype_backend (:issue:`59242`)
134134
-
135135

136136
Period

pandas/core/internals/construction.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,17 @@ def convert(arr):
970970
if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
971971
new_dtype = StringDtype()
972972
arr_cls = new_dtype.construct_array_type()
973-
arr = arr_cls._from_sequence(arr, dtype=new_dtype)
973+
try:
974+
# Addressing (#59242)
975+
# Byte data that could not be decoded into
976+
# a string would throw a UnicodeDecodeError exception
977+
978+
# Try and greedily convert to string
979+
# Will fail if the object is bytes
980+
arr = arr_cls._from_sequence(arr, dtype=new_dtype)
981+
except UnicodeDecodeError:
982+
pass
983+
974984
elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
975985
if arr.dtype.kind in "iufb":
976986
arr = pd_array(arr, copy=False)

pandas/tests/io/test_sql.py

+14
Original file line numberDiff line numberDiff line change
@@ -4352,3 +4352,17 @@ def test_xsqlite_if_exists(sqlite_buildin):
43524352
(5, "E"),
43534353
]
43544354
drop_table(table_name, sqlite_buildin)
4355+
4356+
4357+
def test_bytes_column(sqlite_buildin):
4358+
"""
4359+
Regression test for (#59242)
4360+
Bytes being returned in a column that could not be converted
4361+
to a string would raise a UnicodeDecodeError
4362+
when using dtype_backend='pyarrow'
4363+
"""
4364+
query = """
4365+
select cast(x'0123456789abcdef0123456789abcdef' as blob) a
4366+
"""
4367+
df = pd.read_sql(query, sqlite_buildin, dtype_backend="pyarrow")
4368+
assert df.a.values[0] == b"\x01#Eg\x89\xab\xcd\xef\x01#Eg\x89\xab\xcd\xef"

0 commit comments

Comments
 (0)