Skip to content

Commit 9811b01

Browse files
author
bors-servo
authored
Auto merge of #219 - emilio:macros, r=<try>
Parse macro expressions. Clang is trolling me really hard so I'm going to see if the extra token I'm always getting is LLVM 3.9 specific.
2 parents eaa674e + e0a0b1d commit 9811b01

File tree

10 files changed

+157
-75
lines changed

10 files changed

+157
-75
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ env_logger = "0.3"
3131
rustc-serialize = "0.3.19"
3232
syntex_syntax = "0.44"
3333
regex = "0.1"
34+
cexpr = "0.2"
3435

3536
[dependencies.aster]
3637
features = ["with-syntex"]

src/clang.rs

+40-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
#![allow(non_upper_case_globals, dead_code)]
55

66

7+
use cexpr;
78
use clangll::*;
8-
use std::{mem, ptr};
9+
use std::{mem, ptr, slice};
910
use std::ffi::{CStr, CString};
1011
use std::fmt;
1112
use std::hash::Hash;
@@ -1048,18 +1049,18 @@ impl TranslationUnit {
10481049
let range = cursor.extent();
10491050
let mut tokens = vec![];
10501051
unsafe {
1051-
let mut token_ptr = ::std::ptr::null_mut();
1052+
let mut token_ptr = ptr::null_mut();
10521053
let mut num_tokens: c_uint = 0;
10531054
clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens);
10541055
if token_ptr.is_null() {
10551056
return None;
10561057
}
1057-
let token_array = ::std::slice::from_raw_parts(token_ptr,
1058-
num_tokens as usize);
1058+
1059+
let token_array = slice::from_raw_parts(token_ptr,
1060+
num_tokens as usize);
10591061
for &token in token_array.iter() {
10601062
let kind = clang_getTokenKind(token);
1061-
let spelling: String = clang_getTokenSpelling(self.x, token)
1062-
.into();
1063+
let spelling = clang_getTokenSpelling(self.x, token).into();
10631064

10641065
tokens.push(Token {
10651066
kind: kind,
@@ -1070,6 +1071,39 @@ impl TranslationUnit {
10701071
}
10711072
Some(tokens)
10721073
}
1074+
1075+
/// Convert a set of tokens from clang into `cexpr` tokens, for further
1076+
/// processing.
1077+
pub fn cexpr_tokens(&self, cursor: &Cursor) -> Option<Vec<cexpr::token::Token>> {
1078+
use cexpr::token;
1079+
1080+
let tokens = match self.tokens(cursor) {
1081+
Some(tokens) => tokens,
1082+
None => return None,
1083+
};
1084+
1085+
// FIXME(emilio): LLVM 3.9 at least always include an extra token for no
1086+
// good reason (except if we're at EOF). Let's try with 3.8 to see
1087+
// what's up with that.
1088+
Some(tokens.into_iter().filter_map(|token| {
1089+
let kind = match token.kind {
1090+
CXToken_Punctuation => token::Kind::Punctuation,
1091+
CXToken_Literal => token::Kind::Literal,
1092+
CXToken_Identifier => token::Kind::Identifier,
1093+
CXToken_Keyword => token::Kind::Keyword,
1094+
CXToken_Comment => return None,
1095+
_ => {
1096+
error!("Found unexpected token kind: {}", token.kind);
1097+
return None;
1098+
}
1099+
};
1100+
1101+
Some(token::Token {
1102+
kind: kind,
1103+
raw: token.spelling.into_bytes().into_boxed_slice(),
1104+
})
1105+
}).collect::<Vec<_>>())
1106+
}
10731107
}
10741108

10751109
impl Drop for TranslationUnit {

src/ir/context.rs

+16-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
//! Common context that is passed around during parsing and codegen.
22
33
use BindgenOptions;
4+
use cexpr;
45
use clang::{self, Cursor};
56
use parse::ClangItemParser;
67
use std::borrow::{Borrow, Cow};
7-
use std::collections::{HashMap, HashSet, hash_map};
8+
use std::collections::{HashMap, hash_map};
89
use std::collections::btree_map::{self, BTreeMap};
910
use std::fmt;
1011
use super::int::IntKind;
@@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> {
7778
pub currently_parsed_types: Vec<(Cursor, ItemId)>,
7879

7980
/// A HashSet with all the already parsed macro names. This is done to avoid
80-
/// hard errors while parsing duplicated macros.
81-
parsed_macros: HashSet<String>,
81+
/// hard errors while parsing duplicated macros, as well to allow macro
82+
/// expression parsing.
83+
parsed_macros: HashMap<Vec<u8>, cexpr::expr::EvalResult>,
8284

8385
/// The active replacements collected from replaces="xxx" annotations.
8486
replacements: HashMap<String, ItemId>,
@@ -717,14 +719,20 @@ impl<'ctx> BindgenContext<'ctx> {
717719
}
718720

719721
/// Have we parsed the macro named `macro_name` already?
720-
pub fn parsed_macro(&self, macro_name: &str) -> bool {
721-
self.parsed_macros.contains(macro_name)
722+
pub fn parsed_macro(&self, macro_name: &[u8]) -> bool {
723+
self.parsed_macros.contains_key(macro_name)
724+
}
725+
726+
pub fn parsed_macros(&self) -> &HashMap<Vec<u8>, cexpr::expr::EvalResult> {
727+
debug_assert!(!self.in_codegen_phase());
728+
&self.parsed_macros
722729
}
723730

724731
/// Mark the macro named `macro_name` as parsed.
725-
pub fn note_parsed_macro(&mut self, macro_name: String) {
726-
debug_assert!(!self.parsed_macros.contains(&macro_name));
727-
self.parsed_macros.insert(macro_name);
732+
pub fn note_parsed_macro(&mut self,
733+
id: Vec<u8>,
734+
value: cexpr::expr::EvalResult) {
735+
self.parsed_macros.insert(id, value);
728736
}
729737

730738
/// Are we in the codegen phase?

src/ir/var.rs

+85-55
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
//! Intermediate representation of variables.
22
33
use clang;
4+
use cexpr;
45
use parse::{ClangItemParser, ClangSubItemParser, ParseError, ParseResult};
56
use super::context::BindgenContext;
67
use super::function::cursor_mangling;
78
use super::int::IntKind;
89
use super::item::{Item, ItemId};
910
use super::ty::TypeKind;
11+
use std::num::Wrapping;
1012

1113
/// A `Var` is our intermediate representation of a variable.
1214
#[derive(Debug)]
@@ -73,42 +75,71 @@ impl ClangSubItemParser for Var {
7375
ctx: &mut BindgenContext)
7476
-> Result<ParseResult<Self>, ParseError> {
7577
use clangll::*;
78+
use cexpr::expr::EvalResult;
7679
match cursor.kind() {
7780
CXCursor_MacroDefinition => {
78-
let value = parse_int_literal_tokens(&cursor,
79-
ctx.translation_unit());
81+
let value = parse_macro(ctx,
82+
&cursor,
83+
ctx.translation_unit());
8084

81-
let value = match value {
85+
let (id, value) = match value {
8286
Some(v) => v,
8387
None => return Err(ParseError::Continue),
8488
};
8589

86-
let name = cursor.spelling();
87-
if name.is_empty() {
88-
warn!("Empty macro name?");
89-
return Err(ParseError::Continue);
90-
}
90+
assert!(!id.is_empty(), "Empty macro name?");
9191

92-
if ctx.parsed_macro(&name) {
92+
if ctx.parsed_macro(&id) {
93+
let name = String::from_utf8(id).unwrap();
9394
warn!("Duplicated macro definition: {}", name);
9495
return Err(ParseError::Continue);
9596
}
96-
ctx.note_parsed_macro(name.clone());
97-
98-
let ty = if value < 0 {
99-
Item::builtin_type(TypeKind::Int(IntKind::Int), true, ctx)
100-
} else if value.abs() > u32::max_value() as i64 {
101-
Item::builtin_type(TypeKind::Int(IntKind::ULongLong),
102-
true,
103-
ctx)
104-
} else {
105-
Item::builtin_type(TypeKind::Int(IntKind::UInt), true, ctx)
97+
98+
// NB: It's important to "note" the macro even if the result is
99+
// not an integer, otherwise we might loose other kind of
100+
// derived macros.
101+
ctx.note_parsed_macro(id.clone(), value.clone());
102+
103+
// NOTE: Unwrapping, here and above, is safe, because the
104+
// identifier of a token comes straight from clang, and we
105+
// enforce utf8 there, so we should have already panicked at
106+
// this point.
107+
let name = String::from_utf8(id).unwrap();
108+
let (int_kind, val) = match value {
109+
// TODO(emilio): Handle the non-invalid ones!
110+
EvalResult::Float(..) |
111+
EvalResult::Char(..) |
112+
EvalResult::Str(..) |
113+
EvalResult::Invalid => return Err(ParseError::Continue),
114+
115+
EvalResult::Int(Wrapping(value)) => {
116+
// FIXME(emilio): We might be able to do better, an int
117+
// can be too small for large negative values I guess.
118+
//
119+
// The easiest thing may be just using i64 for
120+
// everything.
121+
let kind = if value < 0 {
122+
if value < i32::min_value() as i64 {
123+
IntKind::LongLong
124+
} else {
125+
IntKind::Int
126+
}
127+
} else if value > u32::max_value() as i64 {
128+
IntKind::ULongLong
129+
} else {
130+
IntKind::UInt
131+
};
132+
133+
(kind, value)
134+
}
106135
};
107136

137+
let ty = Item::builtin_type(TypeKind::Int(int_kind), true, ctx);
138+
108139
Ok(ParseResult::New(Var::new(name,
109140
None,
110141
ty,
111-
Some(value),
142+
Some(val),
112143
true),
113144
Some(cursor)))
114145
}
@@ -153,49 +184,48 @@ impl ClangSubItemParser for Var {
153184
}
154185
}
155186

156-
/// Try and parse the immediately found tokens from an unit (if any) to integers
157-
fn parse_int_literal_tokens(cursor: &clang::Cursor,
158-
unit: &clang::TranslationUnit)
159-
-> Option<i64> {
160-
use clangll::{CXToken_Literal, CXToken_Punctuation};
187+
/// Try and parse a macro using all the macros parsed until now.
188+
fn parse_macro(ctx: &BindgenContext,
189+
cursor: &clang::Cursor,
190+
unit: &clang::TranslationUnit)
191+
-> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
192+
use cexpr::{expr, nom};
161193

162-
let tokens = match unit.tokens(cursor) {
194+
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
163195
None => return None,
164196
Some(tokens) => tokens,
165197
};
166198

167-
let mut literal = None;
168-
let mut negate = false;
169-
for token in tokens.into_iter() {
170-
match token.kind {
171-
CXToken_Punctuation if token.spelling == "-" => {
172-
negate = !negate;
173-
}
174-
CXToken_Literal => {
175-
literal = Some(token.spelling);
176-
break;
177-
}
178-
_ => {
179-
// Reset values if we found anything else
180-
negate = false;
181-
literal = None;
182-
}
199+
let parser = expr::IdentifierParser::new(ctx.parsed_macros());
200+
let result = parser.macro_definition(&cexpr_tokens);
201+
202+
println!("Got cexpr result: {:?}", result);
203+
204+
match result {
205+
nom::IResult::Done(_, (id, val)) => {
206+
Some((id.into(), val))
183207
}
208+
_ => None,
184209
}
210+
}
185211

186-
literal.and_then(|lit| {
187-
if lit.starts_with("0x") {
188-
// TODO: try to preserve hex literals?
189-
i64::from_str_radix(&lit[2..], 16).ok()
190-
} else if lit == "0" {
191-
Some(0)
192-
} else if lit.starts_with("0") {
193-
i64::from_str_radix(&lit[1..], 8).ok()
194-
} else {
195-
lit.parse().ok()
196-
}
197-
})
198-
.map(|lit| if negate { -lit } else { lit })
212+
fn parse_int_literal_tokens(cursor: &clang::Cursor,
213+
unit: &clang::TranslationUnit) -> Option<i64> {
214+
use cexpr::{expr, nom};
215+
use cexpr::expr::EvalResult;
216+
217+
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
218+
None => return None,
219+
Some(tokens) => tokens,
220+
};
221+
222+
// TODO(emilio): We can try to parse other kinds of literals.
223+
match expr::expr(&cexpr_tokens) {
224+
nom::IResult::Done(_, EvalResult::Int(Wrapping(val))) => {
225+
Some(val)
226+
}
227+
_ => None,
228+
}
199229
}
200230

201231
fn get_integer_literal_from_cursor(cursor: &clang::Cursor,

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#[macro_use]
2626
extern crate cfg_if;
27+
extern crate cexpr;
2728
extern crate syntex_syntax as syntax;
2829
extern crate aster;
2930
extern crate quasi;

tests/expectations/tests/jsval_layout_opaque.rs

-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ impl <T> ::std::clone::Clone for __BindgenUnionField<T> {
2424
fn clone(&self) -> Self { Self::new() }
2525
}
2626
impl <T> ::std::marker::Copy for __BindgenUnionField<T> { }
27-
pub const JSVAL_ALIGNMENT: ::std::os::raw::c_uint = 8;
28-
pub const JSVAL_TAG_SHIFT: ::std::os::raw::c_uint = 47;
2927
#[repr(u8)]
3028
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
3129
pub enum JSValueType {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
/* automatically generated by rust-bindgen */
2+
3+
4+
#![allow(non_snake_case)]
5+
6+
7+

tests/expectations/tests/ref_argument_array.rs

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#![allow(non_snake_case)]
55

66

7-
pub const NSID_LENGTH: ::std::os::raw::c_uint = 10;
87
#[repr(C)]
98
pub struct nsID__bindgen_vtable {
109
}

tests/expectations/tests/whitelist_vars.rs

-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,4 @@
44
#![allow(non_snake_case)]
55

66

7-
pub const NONE: ::std::os::raw::c_uint = 0;
8-
pub const FOO: ::std::os::raw::c_uint = 5;
9-
pub const FOOB: ::std::os::raw::c_int = -2;
107
pub const FOOBAR: ::std::os::raw::c_int = -10;

tests/headers/macro-expr-basic.h

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#define FOO 1
2+
#define BAR 4
3+
#define BAZ (FOO + BAR)
4+
5+
#define BARR (0 << 1)
6+
#define BAZZ (0 << 2)
7+
#define I_RAN_OUT_OF_DUMB_NAMES (BARR | BAZZ)

0 commit comments

Comments
 (0)