From e7578384b65c07f1d08f9a991071a0bda35809fb Mon Sep 17 00:00:00 2001 From: Ben Hoyt Date: Thu, 19 Aug 2021 12:33:15 +1200 Subject: [PATCH 1/3] Speed up Python apply_mask 20x by using int.from_bytes/to_bytes This speeds up the Python version of utils.apply_mask about 20 times, using int.from_bytes so that the XOR is done in a single Python operation -- in other words, the loop over the bytes is in C rather than in Python. Note that it is a trade-off as it uses more memory: this version allocates roughly len(data) bytes for each of the intermediate values (e.g., data_int, mask_repeated, mask_int, the XOR result); whereas I believe the original version only allocates for the return value. Still, most websocket packets aren't huge, and I believe the massive speed gain here makes it worth it. (And people that use the speedups.c version won't be affected.) Obviously the speedups.c version is still significantly faster again, but this change makes the library more usable in environments when it's not feasible to use the C extension. Data Size ForLoop IntXor Speedups ------------------------------------ 1KB 78.6us 3.79us 151ns 1MB 79.7ms 4.38ms 55.4us I got these timings by using commands like the following (with the function call adjusted, and 1024 replaced with 1024*1024 as needed). python3 -m timeit \ -s 'from websockets.utils import apply_mask' \ -s 'data=b"x"*1024; mask=b"abcd"' \ 'apply_mask(data, mask)' This idea came from Will McGugan's blog post "Speeding up Websockets 60X": https://www.willmcgugan.com/blog/tech/post/speeding-up-websockets-60x/ That post contains an ever faster (about 50% faster) way to solve it using a pre-calculated XOR lookup table, but that pre-allocates a 64K-entry table at import time, which didn't seem ideal. Still, that is how aiohttp does it, so maybe it's worth considering: https://github.com/aio-libs/aiohttp/blob/6ec33c5d841c8e845c27ebdd9384bbf72651cbb8/aiohttp/http_websocket.py#L115-L140 The int.from_bytes approach is also the approach used by the websocket-client library: https://github.com/websocket-client/websocket-client/blob/5f32b3c0cfb836c016ad2a5f6caeff2978a6a16f/websocket/_abnf.py#L46-L50 --- src/websockets/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/websockets/utils.py b/src/websockets/utils.py index ffb706963..f259099fa 100644 --- a/src/websockets/utils.py +++ b/src/websockets/utils.py @@ -4,6 +4,7 @@ import hashlib import itertools import secrets +import sys __all__ = ["accept_key", "apply_mask"] @@ -43,4 +44,7 @@ def apply_mask(data: bytes, mask: bytes) -> bytes: if len(mask) != 4: raise ValueError("mask must contain 4 bytes") - return bytes(b ^ m for b, m in zip(data, itertools.cycle(mask))) + data_int = int.from_bytes(data, sys.byteorder) + mask_repeated = mask * (len(data) // 4) + mask[:len(data) % 4] + mask_int = int.from_bytes(mask_repeated, sys.byteorder) + return (data_int ^ mask_int).to_bytes(len(data), sys.byteorder) From 0a0905be5379dfb3a12d569b83e6f4d223da14ae Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Thu, 19 Aug 2021 15:48:03 +0200 Subject: [PATCH 2/3] Run black --- src/websockets/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/websockets/utils.py b/src/websockets/utils.py index f259099fa..44fb6c251 100644 --- a/src/websockets/utils.py +++ b/src/websockets/utils.py @@ -45,6 +45,6 @@ def apply_mask(data: bytes, mask: bytes) -> bytes: raise ValueError("mask must contain 4 bytes") data_int = int.from_bytes(data, sys.byteorder) - mask_repeated = mask * (len(data) // 4) + mask[:len(data) % 4] + mask_repeated = mask * (len(data) // 4) + mask[: len(data) % 4] mask_int = int.from_bytes(mask_repeated, sys.byteorder) return (data_int ^ mask_int).to_bytes(len(data), sys.byteorder) From 364a5800f23db7b972cd5d295b4695fe2c311da5 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Thu, 19 Aug 2021 15:49:55 +0200 Subject: [PATCH 3/3] Run flake8 --- src/websockets/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/websockets/utils.py b/src/websockets/utils.py index 44fb6c251..c6e4b788c 100644 --- a/src/websockets/utils.py +++ b/src/websockets/utils.py @@ -2,7 +2,6 @@ import base64 import hashlib -import itertools import secrets import sys