Skip to content

Commit ce4f73a

Browse files
committed
auto merge of #5945 : graydon/rust/fix-unicode-tables, r=pcwalton
This switches the unicode functions in core to use static character-range tables and a binary search helper rather than open-coded switch statements. It adds about 50k of read only data to the libcore binary but cuts out a similar amount of compiled IR. Would have done it this way in the first place but we didn't have structured statics for a long time.
2 parents e67f1c0 + 5a3d26f commit ce4f73a

File tree

4 files changed

+2664
-4583
lines changed

4 files changed

+2664
-4583
lines changed

src/etc/unicode.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,49 @@ def escape_char(c):
112112
return "'\\u%4.4x'" % c
113113
return "'\\U%8.8x'" % c
114114

115+
def ch_prefix(ix):
116+
if ix == 0:
117+
return " "
118+
if ix % 2 == 0:
119+
return ",\n "
120+
else:
121+
return ", "
122+
123+
def emit_bsearch_range_table(f):
124+
f.write("""
125+
pure fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
126+
use cmp::{EQ, LT, GT};
127+
use vec::bsearch;
128+
use option::None;
129+
(do bsearch(r) |&(lo,hi)| {
130+
if lo <= c && c <= hi { EQ }
131+
else if hi < c { LT }
132+
else { GT }
133+
}) != None
134+
}\n\n
135+
""");
136+
115137
def emit_property_module(f, mod, tbl):
138+
f.write("pub mod %s {\n" % mod)
139+
keys = tbl.keys()
140+
keys.sort()
141+
emit_bsearch_range_table(f);
142+
for cat in keys:
143+
f.write(" const %s_table : &[(char,char)] = &[\n" % cat)
144+
ix = 0
145+
for pair in tbl[cat]:
146+
f.write(ch_prefix(ix))
147+
f.write("(%s, %s)" % (escape_char(pair[0]), escape_char(pair[1])))
148+
ix += 1
149+
f.write("\n ];\n\n")
150+
151+
f.write(" pub pure fn %s(c: char) -> bool {\n" % cat)
152+
f.write(" bsearch_range_table(c, %s_table)\n" % cat)
153+
f.write(" }\n\n")
154+
f.write("}\n")
155+
156+
157+
def emit_property_module_old(f, mod, tbl):
116158
f.write("mod %s {\n" % mod)
117159
keys = tbl.keys()
118160
keys.sort()
@@ -193,8 +235,9 @@ def emit_decomp_module(f, canon, compat):
193235
rf = open(r, "w")
194236

195237
(canon_decomp, compat_decomp, gencats) = load_unicode_data("UnicodeData.txt")
196-
emit_decomp_module(rf, canon_decomp, compat_decomp)
197238
emit_property_module(rf, "general_category", gencats)
198239

240+
#emit_decomp_module(rf, canon_decomp, compat_decomp)
241+
199242
derived = load_derived_core_properties("DerivedCoreProperties.txt")
200243
emit_property_module(rf, "derived_property", derived)

0 commit comments

Comments
 (0)