Skip to content

Commit 1de24b9

Browse files
committed
Python script to fetch declarations of Clang's intrinsics
Thus far, we constructed most of the declarations using the get-gcc-builtins.sh script. We may need to continue to do so for GCC. Clang's declarations, however, seem easier to parse. This new script now takes care of this. In future, we may decide to completely remove the GCC-specific script, and build upon Clang's declarations with selective tweaks instead.
1 parent 6022a46 commit 1de24b9

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Download Clang builtin declarations from the llvm-project git repository and
4+
# parse them to generate declarations to use from within our C front-end.
5+
6+
import re
7+
import requests
8+
import sys
9+
10+
11+
prefix_map = {
12+
'I': '',
13+
'N': '',
14+
'O': 'long long',
15+
'S': 'signed',
16+
'U': 'unsigned',
17+
'W': 'int64_t',
18+
'Z': 'int32_t'
19+
}
20+
21+
# we don't support:
22+
# G -> id (Objective-C)
23+
# H -> SEL (Objective-C)
24+
# M -> struct objc_super (Objective-C)
25+
# q -> Scalable vector, followed by the number of elements and base type
26+
# E -> ext_vector, followed by the number of elements and base type
27+
# A -> "reference" to __builtin_va_list
28+
typespec_map = {
29+
'F': 'const CFString',
30+
'J': 'jmp_buf',
31+
'K': 'ucontext_t',
32+
'P': 'FILE',
33+
'Y': 'ptrdiff_t',
34+
'a': '__builtin_va_list',
35+
'b': '_Bool',
36+
'c': 'char',
37+
'd': 'double',
38+
'f': 'float',
39+
'h': '__fp16',
40+
'i': 'int',
41+
'p': 'pid_t',
42+
's': 'short',
43+
'v': 'void',
44+
'w': 'wchar_t',
45+
'x': '_Float16',
46+
'y': '__bf16',
47+
'z': '__CPROVER_size_t'
48+
}
49+
50+
# we don't support:
51+
# & -> reference (optionally followed by an address space number)
52+
modifier_map = {'C': 'const', 'D': 'volatile', 'R': 'restrict'}
53+
54+
# declarations as found in ansi-c/gcc_builtin_headers_types.h
55+
vector_map = {
56+
'char': {
57+
8: '__gcc_v8qi',
58+
16: '__gcc_v16qi',
59+
32: '__gcc_v32qi',
60+
64: '__gcc_v64qi'
61+
},
62+
'unsigned char': {
63+
1024: '__tile'
64+
},
65+
'short': {
66+
4: '__gcc_v4hi',
67+
8: '__gcc_v8hi',
68+
16: '__gcc_v16hi',
69+
32: '__gcc_v32hi'
70+
},
71+
# new
72+
'unsigned short': {
73+
8: '__gcc_v8uhi',
74+
16: '__gcc_v16uhi',
75+
32: '__gcc_v32uhi',
76+
},
77+
'int': {
78+
2: '__gcc_v2si',
79+
4: '__gcc_v4si',
80+
8: '__gcc_v8si',
81+
16: '__gcc_v16si',
82+
256: '__gcc_v256si'
83+
},
84+
# new
85+
'unsigned int': {
86+
4: '__gcc_v4usi',
87+
8: '__gcc_v8usi',
88+
16: '__gcc_v16usi',
89+
},
90+
'long long int': {
91+
1: '__gcc_v1di',
92+
2: '__gcc_v2di',
93+
4: '__gcc_v4di',
94+
8: '__gcc_v8di'
95+
},
96+
# new
97+
'unsigned long long int': {
98+
2: '__gcc_v2udi',
99+
4: '__gcc_v4udi',
100+
8: '__gcc_v8udi',
101+
},
102+
# new
103+
'_Float16': {
104+
8: '__gcc_v8hf',
105+
16: '__gcc_v16hf',
106+
32: '__gcc_v32hf'
107+
},
108+
'float': {
109+
2: '__gcc_v2sf',
110+
4: '__gcc_v4sf',
111+
8: '__gcc_v8sf',
112+
16: '__gcc_v16sf'
113+
},
114+
'double': {
115+
2: '__gcc_v2df',
116+
4: '__gcc_v4df',
117+
8: '__gcc_v8df'
118+
}
119+
}
120+
121+
122+
def parse_prefix(types, i):
123+
prefix = []
124+
while i < len(types):
125+
p = types[i]
126+
if i + 3 < len(types) and types[i:i+4] == 'LLLi':
127+
prefix.append('__int128_t')
128+
i += 4
129+
elif i + 1 < len(types) and types[i:i+2] == 'LL':
130+
prefix.extend(['long', 'long'])
131+
i += 2
132+
elif p == 'L':
133+
prefix.append('long')
134+
i += 1
135+
elif i + 1 < len(types) and types[i:i+2] == 'SJ':
136+
break
137+
elif i + 1 < len(types) and (
138+
types[i:i+2] == 'Wi' or types[i:i+2] == 'Zi'):
139+
prefix.append(prefix_map[p])
140+
i += 2
141+
elif prefix_map.get(p) is not None:
142+
mapped = prefix_map[p]
143+
if len(mapped):
144+
prefix.append(prefix_map[p])
145+
i += 1
146+
else:
147+
break
148+
149+
return prefix, i
150+
151+
152+
def build_type_inner(types, i):
153+
(typespec, i) = parse_prefix(types, i)
154+
155+
if i < len(types):
156+
t = types[i]
157+
if i + 2 < len(types) and t == 'V':
158+
m = re.match(r'(\d+).*', types[i+1:])
159+
if m and i + 1 + len(m[1]) < len(types):
160+
(elem_type_list, next_i) = build_type_inner(
161+
types, i + 1 + len(m[1]))
162+
elem_type = ' '.join(elem_type_list)
163+
if vector_map.get(elem_type):
164+
typespec.append(vector_map[elem_type][int(m[1])])
165+
i = next_i
166+
elif i + 1 < len(types) and t == 'X' and (
167+
typespec_map.get(types[i + 1])):
168+
typespec.append(typespec_map[types[i + 1]])
169+
typespec_map.append('_Complex')
170+
i += 2
171+
elif i + 1 < len(types) and types[i:i+2] == 'SJ':
172+
typespec.append('sigjmp_buf')
173+
i += 2
174+
elif t == '.' and i + 1 == len(types):
175+
typespec.append('...')
176+
i += 1
177+
elif typespec_map.get(t):
178+
typespec.append(typespec_map[t])
179+
i += 1
180+
181+
return typespec, i
182+
183+
184+
def build_type(types, i):
185+
(typespec, i) = build_type_inner(types, i)
186+
187+
while i < len(types):
188+
s = types[i]
189+
if s == '*':
190+
typespec.append('*')
191+
i += 1
192+
elif modifier_map.get(s):
193+
typespec.insert(0, modifier_map[s])
194+
i += 1
195+
else:
196+
break
197+
198+
return ' '.join(typespec), i
199+
200+
201+
def process_line(name, types, attributes):
202+
"""
203+
Process the macro declaring "name" as specified at the top of
204+
https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
205+
We don't yet parse attributes.
206+
"""
207+
208+
type_specs = []
209+
i = 0
210+
while i < len(types):
211+
(t, i_updated) = build_type(types, i)
212+
assert i_updated > i, ('failed to parse type spec of' + name + ': ' +
213+
types[i:])
214+
i = i_updated
215+
type_specs.append(t)
216+
217+
assert len(type_specs), 'missing return type in ' + types
218+
if len(type_specs) == 1:
219+
type_specs.append('void')
220+
return type_specs[0] + ' ' + name + '(' + ', '.join(type_specs[1:]) + ');'
221+
222+
223+
def process(input_lines):
224+
declarations = {}
225+
for l in input_lines:
226+
m = re.match(r'BUILTIN\((\w+),\s*"(.+)",\s*"(.*)"\)', l)
227+
if m:
228+
declaration = process_line(m[1], m[2], m[3])
229+
if not declarations.get('clang'):
230+
declarations['clang'] = {}
231+
declarations['clang'][m[1]] = declaration
232+
continue
233+
m = re.match(
234+
r'TARGET_BUILTIN\((\w+),\s*"(.+)",\s*"(.*)",\s*"(.*)"\)', l)
235+
if m:
236+
declaration = process_line(m[1], m[2], m[3])
237+
group = m[4]
238+
if len(group) == 0:
239+
group = 'clang'
240+
if not declarations.get(group):
241+
declarations[group] = {}
242+
declarations[group][m[1]] = declaration
243+
244+
return declarations
245+
246+
247+
def print_declarations(declaration_map, known_declarations):
248+
for k, v in sorted(declaration_map.items()):
249+
new_decls = []
250+
conflicting_decls = []
251+
for name, decl in sorted(v.items()):
252+
known_decl = known_declarations.get(name)
253+
if not known_decl:
254+
new_decls.append(decl)
255+
elif known_decl.replace(' ', '') != decl.replace(' ', ''):
256+
conflicting_decls.append(decl + ' // old decl: ' + known_decl)
257+
if len(new_decls) + len(conflicting_decls):
258+
print('// ' + k)
259+
for decl in new_decls:
260+
print(decl)
261+
for decl in conflicting_decls:
262+
print(decl)
263+
264+
265+
def read_declarations():
266+
known_declarations = {}
267+
for fname in sys.argv[1:]:
268+
with open(fname) as f:
269+
lines = f.readlines()
270+
for l in lines:
271+
m = re.match(r'.* (\w+)\(.*\);', l)
272+
if m:
273+
known_declarations[m[1]] = m[0]
274+
275+
return known_declarations
276+
277+
278+
def main():
279+
known_declarations = read_declarations()
280+
base_url = ('https://raw.githubusercontent.com/llvm/llvm-project/' +
281+
'main/clang/include/clang/Basic/')
282+
files = ['BuiltinsX86.def', 'BuiltinsX86_64.def']
283+
declaration_map = {}
284+
for f in files:
285+
url = base_url + f
286+
lines = requests.get(base_url + f).text.split('\n')
287+
for k, v in process(lines).items():
288+
if not declaration_map.get(k):
289+
declaration_map[k] = v
290+
else:
291+
declaration_map[k].update(v)
292+
293+
print_declarations(declaration_map, known_declarations)
294+
295+
296+
if __name__ == "__main__":
297+
main()

0 commit comments

Comments
 (0)