Skip to content

Commit 6806971

Browse files
author
Oliver Seemann
committed
Avoid UnicodeDecodeError for non-utf8 QueryEvents
Query strings in QueryEvents that appear in the binlog stream must not necessarily be utf-8 encoded, but the current implementation handles only utf-8. This commit adds the `errors="backslashreplace"` kwarg to decode(), to avoid a runtime error and insert \xNN escape sequences for byte sequences that are not valid utf-8. It includes a test that generates a QueryEvent with latin-1 encoding, which fails without the fix.
1 parent a19a5a5 commit 6806971

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

pymysqlreplication/event.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,9 @@ def __init__(self, from_packet, event_size, table_map, ctl_connection, **kwargs)
324324
self.schema = self.packet.read(self.schema_length)
325325
self.packet.advance(1)
326326

327-
self.query = self.packet.read(event_size - 13 - self.status_vars_length
328-
- self.schema_length - 1).decode("utf-8")
327+
query = self.packet.read(event_size - 13 - self.status_vars_length
328+
- self.schema_length - 1)
329+
self.query = query.decode("utf-8", errors='backslashreplace')
329330
#string[EOF] query
330331

331332
def _dump(self):

pymysqlreplication/tests/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ class PyMySQLReplicationTestCase(base):
1818
def ignoredEvents(self):
1919
return []
2020

21-
def setUp(self):
21+
def setUp(self, charset="utf8"):
2222
# default
2323
self.database = {
2424
"host": os.environ.get("MYSQL_5_7") or "localhost",
2525
"user": "root",
2626
"passwd": "",
2727
"port": 3306,
2828
"use_unicode": True,
29-
"charset": "utf8",
29+
"charset": charset,
3030
"db": "pymysqlreplication_test"
3131
}
3232

pymysqlreplication/tests/test_basic.py

+18
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,24 @@ def test_rows_query_log_event(self):
11901190
event = self.stream.fetchone()
11911191
self.assertIsInstance(event, RowsQueryLogEvent)
11921192

1193+
class TestLatin1(base.PyMySQLReplicationTestCase):
1194+
1195+
def setUp(self):
1196+
super().setUp(charset='latin1')
1197+
1198+
def test_query_event_latin1(self):
1199+
"""
1200+
Ensure query events with a non-utf8 encoded query are parsed without errors.
1201+
"""
1202+
self.stream = BinLogStreamReader(self.database, server_id=1024, only_events=[QueryEvent])
1203+
self.execute("CREATE TABLE test_latin1_ÖÆÛ (a INT)")
1204+
self.execute("COMMIT")
1205+
assert "ÖÆÛ".encode('latin-1') == b'\xd6\xc6\xdb'
1206+
1207+
event = self.stream.fetchone()
1208+
assert event.query.startswith("CREATE TABLE test")
1209+
assert event.query == r"CREATE TABLE test_latin1_\xd6\xc6\xdb (a INT)"
1210+
11931211

11941212
if __name__ == "__main__":
11951213
import unittest

0 commit comments

Comments
 (0)