Skip to content

Commit c2364bb

Browse files
oseemannOliver Seemann
and
Oliver Seemann
authored
Avoid UnicodeDecodeError for non-utf8 QueryEvents (julien-duponchelle#465)
Query strings in QueryEvents that appear in the binlog stream must not necessarily be utf-8 encoded, but the current implementation handles only utf-8. This commit adds the `errors="backslashreplace"` kwarg to decode(), to avoid a runtime error and insert \xNN escape sequences for byte sequences that are not valid utf-8. It includes a test that generates a QueryEvent with latin-1 encoding, which fails without the fix. Co-authored-by: Oliver Seemann <[email protected]>
1 parent 73e2eeb commit c2364bb

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

Diff for: pymysqlreplication/event.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,9 @@ def __init__(self, from_packet, event_size, table_map, ctl_connection, **kwargs)
325325
self.schema = self.packet.read(self.schema_length)
326326
self.packet.advance(1)
327327

328-
self.query = self.packet.read(event_size - 13 - self.status_vars_length
329-
- self.schema_length - 1).decode("utf-8")
328+
query = self.packet.read(event_size - 13 - self.status_vars_length
329+
- self.schema_length - 1)
330+
self.query = query.decode("utf-8", errors='backslashreplace')
330331
#string[EOF] query
331332

332333
def _dump(self):

Diff for: pymysqlreplication/tests/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ class PyMySQLReplicationTestCase(base):
1818
def ignoredEvents(self):
1919
return []
2020

21-
def setUp(self):
21+
def setUp(self, charset="utf8"):
2222
# default
2323
self.database = {
2424
"host": os.environ.get("MYSQL_5_7") or "localhost",
2525
"user": "root",
2626
"passwd": "",
2727
"port": 3306,
2828
"use_unicode": True,
29-
"charset": "utf8",
29+
"charset": charset,
3030
"db": "pymysqlreplication_test"
3131
}
3232

Diff for: pymysqlreplication/tests/test_basic.py

+18
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,24 @@ def test_rows_query_log_event(self):
13711371
event = self.stream.fetchone()
13721372
self.assertIsInstance(event, RowsQueryLogEvent)
13731373

1374+
class TestLatin1(base.PyMySQLReplicationTestCase):
1375+
1376+
def setUp(self):
1377+
super().setUp(charset='latin1')
1378+
1379+
def test_query_event_latin1(self):
1380+
"""
1381+
Ensure query events with a non-utf8 encoded query are parsed without errors.
1382+
"""
1383+
self.stream = BinLogStreamReader(self.database, server_id=1024, only_events=[QueryEvent])
1384+
self.execute("CREATE TABLE test_latin1_ÖÆÛ (a INT)")
1385+
self.execute("COMMIT")
1386+
assert "ÖÆÛ".encode('latin-1') == b'\xd6\xc6\xdb'
1387+
1388+
event = self.stream.fetchone()
1389+
assert event.query.startswith("CREATE TABLE test")
1390+
assert event.query == r"CREATE TABLE test_latin1_\xd6\xc6\xdb (a INT)"
1391+
13741392

13751393
if __name__ == "__main__":
13761394
import unittest

0 commit comments

Comments
 (0)