Skip to content

Commit ee19ea7

Browse files
authored
[mypyc] Add primitives and specialization for ord() (#18240)
This makes a microbenchmark adapted from an internal production codebase that heavily uses `ord()` over 10x faster. Work on mypyc/mypyc#644 and mypyc/mypyc#880.
1 parent cc45bec commit ee19ea7

File tree

10 files changed

+147
-10
lines changed

10 files changed

+147
-10
lines changed

mypyc/doc/str_operations.rst

+6
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,9 @@ Methods
3333
* ``s.split(sep: str)``
3434
* ``s.split(sep: str, maxsplit: int)``
3535
* ``s1.startswith(s2: str)``
36+
37+
Functions
38+
---------
39+
40+
* ``len(s: str)``
41+
* ``ord(s: str)``

mypyc/irbuild/specialize.py

+11
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from mypy.nodes import (
2020
ARG_NAMED,
2121
ARG_POS,
22+
BytesExpr,
2223
CallExpr,
2324
DictExpr,
2425
Expression,
@@ -877,3 +878,13 @@ def translate_float(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Valu
877878
# No-op float conversion.
878879
return builder.accept(arg)
879880
return None
881+
882+
883+
@specialize_function("builtins.ord")
884+
def translate_ord(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value | None:
885+
if len(expr.args) != 1 or expr.arg_kinds[0] != ARG_POS:
886+
return None
887+
arg = expr.args[0]
888+
if isinstance(arg, (StrExpr, BytesExpr)) and len(arg.value) == 1:
889+
return Integer(ord(arg.value))
890+
return None

mypyc/lib-rt/CPy.h

+2
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,7 @@ bool CPyStr_IsTrue(PyObject *obj);
730730
Py_ssize_t CPyStr_Size_size_t(PyObject *str);
731731
PyObject *CPy_Decode(PyObject *obj, PyObject *encoding, PyObject *errors);
732732
PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors);
733+
CPyTagged CPyStr_Ord(PyObject *obj);
733734

734735

735736
// Bytes operations
@@ -740,6 +741,7 @@ PyObject *CPyBytes_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
740741
CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index);
741742
PyObject *CPyBytes_Concat(PyObject *a, PyObject *b);
742743
PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter);
744+
CPyTagged CPyBytes_Ord(PyObject *obj);
743745

744746

745747
int CPyBytes_Compare(PyObject *left, PyObject *right);

mypyc/lib-rt/bytes_ops.c

+17
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,20 @@ PyObject *CPyBytes_Build(Py_ssize_t len, ...) {
141141

142142
return (PyObject *)ret;
143143
}
144+
145+
146+
CPyTagged CPyBytes_Ord(PyObject *obj) {
147+
if (PyBytes_Check(obj)) {
148+
Py_ssize_t s = PyBytes_GET_SIZE(obj);
149+
if (s == 1) {
150+
return (unsigned char)(PyBytes_AS_STRING(obj)[0]) << 1;
151+
}
152+
} else if (PyByteArray_Check(obj)) {
153+
Py_ssize_t s = PyByteArray_GET_SIZE(obj);
154+
if (s == 1) {
155+
return (unsigned char)(PyByteArray_AS_STRING(obj)[0]) << 1;
156+
}
157+
}
158+
PyErr_SetString(PyExc_TypeError, "ord() expects a character");
159+
return CPY_INT_TAG;
160+
}

mypyc/lib-rt/str_ops.c

+12
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,15 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors) {
243243
return NULL;
244244
}
245245
}
246+
247+
248+
CPyTagged CPyStr_Ord(PyObject *obj) {
249+
Py_ssize_t s = PyUnicode_GET_LENGTH(obj);
250+
if (s == 1) {
251+
int kind = PyUnicode_KIND(obj);
252+
return PyUnicode_READ(kind, PyUnicode_DATA(obj), 0) << 1;
253+
}
254+
PyErr_Format(
255+
PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s);
256+
return CPY_INT_TAG;
257+
}

mypyc/primitives/bytes_ops.py

+8
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,11 @@
9999
error_kind=ERR_MAGIC,
100100
var_arg_type=bytes_rprimitive,
101101
)
102+
103+
function_op(
104+
name="builtins.ord",
105+
arg_types=[bytes_rprimitive],
106+
return_type=int_rprimitive,
107+
c_function_name="CPyBytes_Ord",
108+
error_kind=ERR_MAGIC,
109+
)

mypyc/primitives/str_ops.py

+8
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,11 @@
251251
c_function_name="CPy_Encode",
252252
error_kind=ERR_MAGIC,
253253
)
254+
255+
function_op(
256+
name="builtins.ord",
257+
arg_types=[str_rprimitive],
258+
return_type=int_rprimitive,
259+
c_function_name="CPyStr_Ord",
260+
error_kind=ERR_MAGIC,
261+
)

mypyc/test-data/irbuild-str.test

+43
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,46 @@ L0:
383383
r37 = 'latin2'
384384
r38 = CPy_Encode(s, r37, 0)
385385
return 1
386+
387+
[case testOrd]
388+
def str_ord(x: str) -> int:
389+
return ord(x)
390+
def str_ord_literal() -> int:
391+
return ord("a")
392+
def bytes_ord(x: bytes) -> int:
393+
return ord(x)
394+
def bytes_ord_literal() -> int:
395+
return ord(b"a")
396+
def any_ord(x) -> int:
397+
return ord(x)
398+
[out]
399+
def str_ord(x):
400+
x :: str
401+
r0 :: int
402+
L0:
403+
r0 = CPyStr_Ord(x)
404+
return r0
405+
def str_ord_literal():
406+
L0:
407+
return 194
408+
def bytes_ord(x):
409+
x :: bytes
410+
r0 :: int
411+
L0:
412+
r0 = CPyBytes_Ord(x)
413+
return r0
414+
def bytes_ord_literal():
415+
L0:
416+
return 194
417+
def any_ord(x):
418+
x, r0 :: object
419+
r1 :: str
420+
r2, r3 :: object
421+
r4 :: int
422+
L0:
423+
r0 = builtins :: module
424+
r1 = 'ord'
425+
r2 = CPyObject_GetAttr(r0, r1)
426+
r3 = PyObject_CallFunctionObjArgs(r2, x, 0)
427+
r4 = unbox(int, r3)
428+
return r4

mypyc/test-data/run-bytes.test

+23
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,29 @@ def test_len() -> None:
111111
assert len(b) == 3
112112
assert len(bytes()) == 0
113113

114+
def test_ord() -> None:
115+
assert ord(b'a') == ord('a')
116+
assert ord(b'a' + bytes()) == ord('a')
117+
assert ord(b'\x00') == 0
118+
assert ord(b'\x00' + bytes()) == 0
119+
assert ord(b'\xfe') == 254
120+
assert ord(b'\xfe' + bytes()) == 254
121+
122+
with assertRaises(TypeError):
123+
ord(b'aa')
124+
with assertRaises(TypeError):
125+
ord(b'')
126+
127+
def test_ord_bytesarray() -> None:
128+
assert ord(bytearray(b'a')) == ord('a')
129+
assert ord(bytearray(b'\x00')) == 0
130+
assert ord(bytearray(b'\xfe')) == 254
131+
132+
with assertRaises(TypeError):
133+
ord(bytearray(b'aa'))
134+
with assertRaises(TypeError):
135+
ord(bytearray(b''))
136+
114137
[case testBytesSlicing]
115138
def test_bytes_slicing() -> None:
116139
b = b'abcdefg'

mypyc/test-data/run-strings.test

+17-10
Original file line numberDiff line numberDiff line change
@@ -565,25 +565,32 @@ def test_chr() -> None:
565565
assert try_invalid(1114112)
566566

567567
[case testOrd]
568+
from testutil import assertRaises
569+
568570
def test_ord() -> None:
571+
assert ord(' ') == 32
572+
assert ord(' ' + str()) == 32
573+
assert ord('\x00') == 0
574+
assert ord('\x00' + str()) == 0
569575
assert ord('\ue000') == 57344
570-
s = "a\xac\u1234\u20ac\U00008000"
571-
# ^^^^ two-digit hex escape
572-
# ^^^^^^ four-digit Unicode escape
573-
# ^^^^^^^^^^ eight-digit Unicode escape
576+
assert ord('\ue000' + str()) == 57344
577+
s = "a\xac\u1234\u20ac\U00010000"
578+
# ^^^^ two-digit hex escape
579+
# ^^^^^^ four-digit Unicode escape
580+
# ^^^^^^^^^^ eight-digit Unicode escape
574581
l1 = [ord(c) for c in s]
575-
assert l1 == [97, 172, 4660, 8364, 32768]
582+
assert l1 == [97, 172, 4660, 8364, 65536]
576583
u = 'abcdé'
577584
assert ord(u[-1]) == 233
578585
assert ord(b'a') == 97
579586
assert ord(b'a' + bytes()) == 97
580-
u2 = '\U0010ffff'
587+
u2 = '\U0010ffff' + str()
581588
assert ord(u2) == 1114111
582-
try:
589+
assert ord('\U0010ffff') == 1114111
590+
with assertRaises(TypeError, "ord() expected a character, but a string of length 2 found"):
583591
ord('aa')
584-
assert False
585-
except TypeError:
586-
pass
592+
with assertRaises(TypeError):
593+
ord('')
587594

588595
[case testDecode]
589596
def test_decode() -> None:

0 commit comments

Comments
 (0)