Skip to content

Commit 2759b05

Browse files
committed
Parse macros with cexpr.
1 parent eaa674e commit 2759b05

File tree

8 files changed

+178
-73
lines changed

8 files changed

+178
-73
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ env_logger = "0.3"
3131
rustc-serialize = "0.3.19"
3232
syntex_syntax = "0.44"
3333
regex = "0.1"
34+
cexpr = "0.2"
3435

3536
[dependencies.aster]
3637
features = ["with-syntex"]

src/clang.rs

+61-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
#![allow(non_upper_case_globals, dead_code)]
55

66

7+
use cexpr;
78
use clangll::*;
8-
use std::{mem, ptr};
9+
use std::{mem, ptr, slice};
910
use std::ffi::{CStr, CString};
1011
use std::fmt;
1112
use std::hash::Hash;
@@ -1048,18 +1049,18 @@ impl TranslationUnit {
10481049
let range = cursor.extent();
10491050
let mut tokens = vec![];
10501051
unsafe {
1051-
let mut token_ptr = ::std::ptr::null_mut();
1052+
let mut token_ptr = ptr::null_mut();
10521053
let mut num_tokens: c_uint = 0;
10531054
clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens);
10541055
if token_ptr.is_null() {
10551056
return None;
10561057
}
1057-
let token_array = ::std::slice::from_raw_parts(token_ptr,
1058-
num_tokens as usize);
1058+
1059+
let token_array = slice::from_raw_parts(token_ptr,
1060+
num_tokens as usize);
10591061
for &token in token_array.iter() {
10601062
let kind = clang_getTokenKind(token);
1061-
let spelling: String = clang_getTokenSpelling(self.x, token)
1062-
.into();
1063+
let spelling = clang_getTokenSpelling(self.x, token).into();
10631064

10641065
tokens.push(Token {
10651066
kind: kind,
@@ -1070,6 +1071,60 @@ impl TranslationUnit {
10701071
}
10711072
Some(tokens)
10721073
}
1074+
1075+
/// Convert a set of tokens from clang into `cexpr` tokens, for further
1076+
/// processing.
1077+
pub fn cexpr_tokens(&self,
1078+
cursor: &Cursor)
1079+
-> Option<Vec<cexpr::token::Token>> {
1080+
use cexpr::token;
1081+
1082+
let mut tokens = match self.tokens(cursor) {
1083+
Some(tokens) => tokens,
1084+
None => return None,
1085+
};
1086+
1087+
// FIXME(emilio): LLVM 3.9 at least always include an extra token for no
1088+
// good reason (except if we're at EOF). So we do this kind of hack,
1089+
// where we skip known-to-cause problems trailing punctuation and
1090+
// trailing keywords.
1091+
//
1092+
// This is sort of unfortunate, though :(.
1093+
//
1094+
// I'll try to get it fixed in LLVM if I have the time to submit a
1095+
// patch.
1096+
let mut trim_last_token = false;
1097+
if let Some(token) = tokens.last() {
1098+
// The starting of the next macro.
1099+
trim_last_token |= token.spelling == "#" &&
1100+
token.kind == CXToken_Punctuation;
1101+
1102+
// A following keyword of any kind, like a following declaration.
1103+
trim_last_token |= token.kind == CXToken_Keyword;
1104+
}
1105+
1106+
if trim_last_token {
1107+
tokens.pop().unwrap();
1108+
}
1109+
1110+
Some(tokens.into_iter()
1111+
.map(|token| {
1112+
let kind = match token.kind {
1113+
CXToken_Punctuation => token::Kind::Punctuation,
1114+
CXToken_Literal => token::Kind::Literal,
1115+
CXToken_Identifier => token::Kind::Identifier,
1116+
CXToken_Keyword => token::Kind::Keyword,
1117+
CXToken_Comment => token::Kind::Comment,
1118+
_ => panic!("Found unexpected token kind: {}", token.kind),
1119+
};
1120+
1121+
token::Token {
1122+
kind: kind,
1123+
raw: token.spelling.into_bytes().into_boxed_slice(),
1124+
}
1125+
})
1126+
.collect::<Vec<_>>())
1127+
}
10731128
}
10741129

10751130
impl Drop for TranslationUnit {

src/ir/context.rs

+17-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
//! Common context that is passed around during parsing and codegen.
22
33
use BindgenOptions;
4+
use cexpr;
45
use clang::{self, Cursor};
56
use parse::ClangItemParser;
67
use std::borrow::{Borrow, Cow};
7-
use std::collections::{HashMap, HashSet, hash_map};
8+
use std::collections::{HashMap, hash_map};
89
use std::collections::btree_map::{self, BTreeMap};
910
use std::fmt;
1011
use super::int::IntKind;
@@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> {
7778
pub currently_parsed_types: Vec<(Cursor, ItemId)>,
7879

7980
/// A HashSet with all the already parsed macro names. This is done to avoid
80-
/// hard errors while parsing duplicated macros.
81-
parsed_macros: HashSet<String>,
81+
/// hard errors while parsing duplicated macros, as well to allow macro
82+
/// expression parsing.
83+
parsed_macros: HashMap<Vec<u8>, cexpr::expr::EvalResult>,
8284

8385
/// The active replacements collected from replaces="xxx" annotations.
8486
replacements: HashMap<String, ItemId>,
@@ -717,14 +719,21 @@ impl<'ctx> BindgenContext<'ctx> {
717719
}
718720

719721
/// Have we parsed the macro named `macro_name` already?
720-
pub fn parsed_macro(&self, macro_name: &str) -> bool {
721-
self.parsed_macros.contains(macro_name)
722+
pub fn parsed_macro(&self, macro_name: &[u8]) -> bool {
723+
self.parsed_macros.contains_key(macro_name)
724+
}
725+
726+
/// Get the currently parsed macros.
727+
pub fn parsed_macros(&self) -> &HashMap<Vec<u8>, cexpr::expr::EvalResult> {
728+
debug_assert!(!self.in_codegen_phase());
729+
&self.parsed_macros
722730
}
723731

724732
/// Mark the macro named `macro_name` as parsed.
725-
pub fn note_parsed_macro(&mut self, macro_name: String) {
726-
debug_assert!(!self.parsed_macros.contains(&macro_name));
727-
self.parsed_macros.insert(macro_name);
733+
pub fn note_parsed_macro(&mut self,
734+
id: Vec<u8>,
735+
value: cexpr::expr::EvalResult) {
736+
self.parsed_macros.insert(id, value);
728737
}
729738

730739
/// Are we in the codegen phase?

src/ir/var.rs

+77-58
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! Intermediate representation of variables.
22
3+
use cexpr;
34
use clang;
45
use parse::{ClangItemParser, ClangSubItemParser, ParseError, ParseResult};
6+
use std::num::Wrapping;
57
use super::context::BindgenContext;
68
use super::function::cursor_mangling;
79
use super::int::IntKind;
@@ -73,43 +75,66 @@ impl ClangSubItemParser for Var {
7375
ctx: &mut BindgenContext)
7476
-> Result<ParseResult<Self>, ParseError> {
7577
use clangll::*;
78+
use cexpr::expr::EvalResult;
7679
match cursor.kind() {
7780
CXCursor_MacroDefinition => {
78-
let value = parse_int_literal_tokens(&cursor,
79-
ctx.translation_unit());
81+
let value = parse_macro(ctx, &cursor, ctx.translation_unit());
8082

81-
let value = match value {
83+
let (id, value) = match value {
8284
Some(v) => v,
8385
None => return Err(ParseError::Continue),
8486
};
8587

86-
let name = cursor.spelling();
87-
if name.is_empty() {
88-
warn!("Empty macro name?");
89-
return Err(ParseError::Continue);
90-
}
88+
assert!(!id.is_empty(), "Empty macro name?");
9189

92-
if ctx.parsed_macro(&name) {
90+
if ctx.parsed_macro(&id) {
91+
let name = String::from_utf8(id).unwrap();
9392
warn!("Duplicated macro definition: {}", name);
9493
return Err(ParseError::Continue);
9594
}
96-
ctx.note_parsed_macro(name.clone());
97-
98-
let ty = if value < 0 {
99-
Item::builtin_type(TypeKind::Int(IntKind::Int), true, ctx)
100-
} else if value.abs() > u32::max_value() as i64 {
101-
Item::builtin_type(TypeKind::Int(IntKind::ULongLong),
102-
true,
103-
ctx)
104-
} else {
105-
Item::builtin_type(TypeKind::Int(IntKind::UInt), true, ctx)
95+
96+
// NB: It's important to "note" the macro even if the result is
97+
// not an integer, otherwise we might loose other kind of
98+
// derived macros.
99+
ctx.note_parsed_macro(id.clone(), value.clone());
100+
101+
// NOTE: Unwrapping, here and above, is safe, because the
102+
// identifier of a token comes straight from clang, and we
103+
// enforce utf8 there, so we should have already panicked at
104+
// this point.
105+
let name = String::from_utf8(id).unwrap();
106+
let (int_kind, val) = match value {
107+
// TODO(emilio): Handle the non-invalid ones!
108+
EvalResult::Float(..) |
109+
EvalResult::Char(..) |
110+
EvalResult::Str(..) |
111+
EvalResult::Invalid => return Err(ParseError::Continue),
112+
113+
EvalResult::Int(Wrapping(value)) => {
114+
// FIXME(emilio): We might be able to do better, an int
115+
// can be too small for large negative values I guess.
116+
//
117+
// The easiest thing may be just using i64 for
118+
// everything.
119+
let kind = if value < 0 {
120+
if value < i32::min_value() as i64 {
121+
IntKind::LongLong
122+
} else {
123+
IntKind::Int
124+
}
125+
} else if value > u32::max_value() as i64 {
126+
IntKind::ULongLong
127+
} else {
128+
IntKind::UInt
129+
};
130+
131+
(kind, value)
132+
}
106133
};
107134

108-
Ok(ParseResult::New(Var::new(name,
109-
None,
110-
ty,
111-
Some(value),
112-
true),
135+
let ty = Item::builtin_type(TypeKind::Int(int_kind), true, ctx);
136+
137+
Ok(ParseResult::New(Var::new(name, None, ty, Some(val), true),
113138
Some(cursor)))
114139
}
115140
CXCursor_VarDecl => {
@@ -153,49 +178,43 @@ impl ClangSubItemParser for Var {
153178
}
154179
}
155180

156-
/// Try and parse the immediately found tokens from an unit (if any) to integers
181+
/// Try and parse a macro using all the macros parsed until now.
182+
fn parse_macro(ctx: &BindgenContext,
183+
cursor: &clang::Cursor,
184+
unit: &clang::TranslationUnit)
185+
-> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
186+
use cexpr::{expr, nom};
187+
188+
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
189+
None => return None,
190+
Some(tokens) => tokens,
191+
};
192+
193+
let parser = expr::IdentifierParser::new(ctx.parsed_macros());
194+
let result = parser.macro_definition(&cexpr_tokens);
195+
196+
match result {
197+
nom::IResult::Done(_, (id, val)) => Some((id.into(), val)),
198+
_ => None,
199+
}
200+
}
201+
157202
fn parse_int_literal_tokens(cursor: &clang::Cursor,
158203
unit: &clang::TranslationUnit)
159204
-> Option<i64> {
160-
use clangll::{CXToken_Literal, CXToken_Punctuation};
205+
use cexpr::{expr, nom};
206+
use cexpr::expr::EvalResult;
161207

162-
let tokens = match unit.tokens(cursor) {
208+
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
163209
None => return None,
164210
Some(tokens) => tokens,
165211
};
166212

167-
let mut literal = None;
168-
let mut negate = false;
169-
for token in tokens.into_iter() {
170-
match token.kind {
171-
CXToken_Punctuation if token.spelling == "-" => {
172-
negate = !negate;
173-
}
174-
CXToken_Literal => {
175-
literal = Some(token.spelling);
176-
break;
177-
}
178-
_ => {
179-
// Reset values if we found anything else
180-
negate = false;
181-
literal = None;
182-
}
183-
}
213+
// TODO(emilio): We can try to parse other kinds of literals.
214+
match expr::expr(&cexpr_tokens) {
215+
nom::IResult::Done(_, EvalResult::Int(Wrapping(val))) => Some(val),
216+
_ => None,
184217
}
185-
186-
literal.and_then(|lit| {
187-
if lit.starts_with("0x") {
188-
// TODO: try to preserve hex literals?
189-
i64::from_str_radix(&lit[2..], 16).ok()
190-
} else if lit == "0" {
191-
Some(0)
192-
} else if lit.starts_with("0") {
193-
i64::from_str_radix(&lit[1..], 8).ok()
194-
} else {
195-
lit.parse().ok()
196-
}
197-
})
198-
.map(|lit| if negate { -lit } else { lit })
199218
}
200219

201220
fn get_integer_literal_from_cursor(cursor: &clang::Cursor,

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#[macro_use]
2626
extern crate cfg_if;
27+
extern crate cexpr;
2728
extern crate syntex_syntax as syntax;
2829
extern crate aster;
2930
extern crate quasi;

tests/expectations/tests/jsval_layout_opaque.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ impl <T> ::std::clone::Clone for __BindgenUnionField<T> {
2424
fn clone(&self) -> Self { Self::new() }
2525
}
2626
impl <T> ::std::marker::Copy for __BindgenUnionField<T> { }
27-
pub const JSVAL_ALIGNMENT: ::std::os::raw::c_uint = 8;
2827
pub const JSVAL_TAG_SHIFT: ::std::os::raw::c_uint = 47;
28+
pub const JSVAL_PAYLOAD_MASK: ::std::os::raw::c_ulonglong = 140737488355327;
29+
pub const JSVAL_TAG_MASK: ::std::os::raw::c_longlong = -140737488355328;
2930
#[repr(u8)]
3031
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
3132
pub enum JSValueType {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* automatically generated by rust-bindgen */
2+
3+
4+
#![allow(non_snake_case)]
5+
6+
7+
pub const FOO: ::std::os::raw::c_uint = 1;
8+
pub const BAR: ::std::os::raw::c_uint = 4;
9+
pub const BAZ: ::std::os::raw::c_uint = 5;
10+
pub const BARR: ::std::os::raw::c_uint = 1;
11+
pub const BAZZ: ::std::os::raw::c_uint = 7;
12+
pub const I_RAN_OUT_OF_DUMB_NAMES: ::std::os::raw::c_uint = 7;

tests/headers/macro-expr-basic.h

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#define FOO 1
2+
#define BAR 4
3+
#define BAZ (FOO + BAR)
4+
5+
#define BARR (1 << 0)
6+
#define BAZZ ((1 << 1) + BAZ)
7+
#define I_RAN_OUT_OF_DUMB_NAMES (BARR | BAZZ)

0 commit comments

Comments
 (0)