Skip to content

Commit 8d83bdf

Browse files
author
bors-servo
authored
Auto merge of #219 - emilio:macros, r=fitzgen
Parse macro expressions. Clang is trolling me really hard so I'm going to see if the extra token I'm always getting is LLVM 3.9 specific.
2 parents c9eccea + 4ee31ce commit 8d83bdf

File tree

11 files changed

+262
-81
lines changed

11 files changed

+262
-81
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ env_logger = "0.3"
3131
rustc-serialize = "0.3.19"
3232
syntex_syntax = "0.44"
3333
regex = "0.1"
34+
cexpr = "0.2"
3435

3536
[dependencies.aster]
3637
features = ["with-syntex"]

src/chooser.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//! A public API for more fine-grained customization of bindgen behavior.
2+
3+
pub use ir::int::IntKind;
4+
use std::fmt;
5+
6+
/// A trait to allow configuring different kinds of types in different
7+
/// situations.
8+
pub trait TypeChooser: fmt::Debug {
9+
/// The integer kind an integer macro should have, given a name and the
10+
/// value of that macro, or `None` if you want the default to be chosen.
11+
fn int_macro(&self, _name: &str, _value: i64) -> Option<IntKind> {
12+
None
13+
}
14+
}

src/clang.rs

+63-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
#![allow(non_upper_case_globals, dead_code)]
55

66

7+
use cexpr;
78
use clangll::*;
8-
use std::{mem, ptr};
9+
use std::{mem, ptr, slice};
910
use std::ffi::{CStr, CString};
1011
use std::fmt;
1112
use std::hash::Hash;
@@ -1050,18 +1051,18 @@ impl TranslationUnit {
10501051
let range = cursor.extent();
10511052
let mut tokens = vec![];
10521053
unsafe {
1053-
let mut token_ptr = ::std::ptr::null_mut();
1054+
let mut token_ptr = ptr::null_mut();
10541055
let mut num_tokens: c_uint = 0;
10551056
clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens);
10561057
if token_ptr.is_null() {
10571058
return None;
10581059
}
1059-
let token_array = ::std::slice::from_raw_parts(token_ptr,
1060-
num_tokens as usize);
1060+
1061+
let token_array = slice::from_raw_parts(token_ptr,
1062+
num_tokens as usize);
10611063
for &token in token_array.iter() {
10621064
let kind = clang_getTokenKind(token);
1063-
let spelling: String = clang_getTokenSpelling(self.x, token)
1064-
.into();
1065+
let spelling = clang_getTokenSpelling(self.x, token).into();
10651066

10661067
tokens.push(Token {
10671068
kind: kind,
@@ -1072,6 +1073,62 @@ impl TranslationUnit {
10721073
}
10731074
Some(tokens)
10741075
}
1076+
1077+
/// Convert a set of tokens from clang into `cexpr` tokens, for further
1078+
/// processing.
1079+
pub fn cexpr_tokens(&self,
1080+
cursor: &Cursor)
1081+
-> Option<Vec<cexpr::token::Token>> {
1082+
use cexpr::token;
1083+
1084+
let mut tokens = match self.tokens(cursor) {
1085+
Some(tokens) => tokens,
1086+
None => return None,
1087+
};
1088+
1089+
// FIXME(emilio): LLVM 3.9 at least always include an extra token for no
1090+
// good reason (except if we're at EOF). So we do this kind of hack,
1091+
// where we skip known-to-cause problems trailing punctuation and
1092+
// trailing keywords.
1093+
//
1094+
// This is sort of unfortunate, though :(.
1095+
//
1096+
// I'll try to get it fixed in LLVM if I have the time to submit a
1097+
// patch.
1098+
let mut trim_last_token = false;
1099+
if let Some(token) = tokens.last() {
1100+
// The starting of the next macro.
1101+
trim_last_token |= token.spelling == "#" &&
1102+
token.kind == CXToken_Punctuation;
1103+
1104+
// A following keyword of any kind, like a following declaration.
1105+
trim_last_token |= token.kind == CXToken_Keyword;
1106+
}
1107+
1108+
if trim_last_token {
1109+
tokens.pop().unwrap();
1110+
}
1111+
1112+
Some(tokens.into_iter()
1113+
.filter_map(|token| {
1114+
let kind = match token.kind {
1115+
CXToken_Punctuation => token::Kind::Punctuation,
1116+
CXToken_Literal => token::Kind::Literal,
1117+
CXToken_Identifier => token::Kind::Identifier,
1118+
CXToken_Keyword => token::Kind::Keyword,
1119+
// NB: cexpr is not too happy about comments inside
1120+
// expressions, so we strip them down here.
1121+
CXToken_Comment => return None,
1122+
_ => panic!("Found unexpected token kind: {}", token.kind),
1123+
};
1124+
1125+
Some(token::Token {
1126+
kind: kind,
1127+
raw: token.spelling.into_bytes().into_boxed_slice(),
1128+
})
1129+
})
1130+
.collect::<Vec<_>>())
1131+
}
10751132
}
10761133

10771134
impl Drop for TranslationUnit {

src/codegen/mod.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1587,8 +1587,19 @@ impl ToRustTy for Type {
15871587
IntKind::ULong => raw!(c_ulong),
15881588
IntKind::LongLong => raw!(c_longlong),
15891589
IntKind::ULongLong => raw!(c_ulonglong),
1590+
1591+
IntKind::I8 => aster::ty::TyBuilder::new().i8(),
1592+
IntKind::U8 => aster::ty::TyBuilder::new().u8(),
1593+
IntKind::I16 => aster::ty::TyBuilder::new().i16(),
15901594
IntKind::U16 => aster::ty::TyBuilder::new().u16(),
1595+
IntKind::I32 => aster::ty::TyBuilder::new().i32(),
15911596
IntKind::U32 => aster::ty::TyBuilder::new().u32(),
1597+
IntKind::I64 => aster::ty::TyBuilder::new().i64(),
1598+
IntKind::U64 => aster::ty::TyBuilder::new().u64(),
1599+
IntKind::Custom { name, .. } => {
1600+
let ident = ctx.rust_ident_raw(name);
1601+
quote_ty!(ctx.ext_cx(), $ident)
1602+
}
15921603
// FIXME: This doesn't generate the proper alignment, but we
15931604
// can't do better right now. We should be able to use
15941605
// i128/u128 when they're available.

src/ir/context.rs

+19-10
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
//! Common context that is passed around during parsing and codegen.
22
33
use BindgenOptions;
4+
use cexpr;
45
use clang::{self, Cursor};
56
use parse::ClangItemParser;
6-
use std::borrow::{Borrow, Cow};
7-
use std::collections::{HashMap, HashSet, hash_map};
7+
use std::borrow::Cow;
8+
use std::collections::{HashMap, hash_map};
89
use std::collections::btree_map::{self, BTreeMap};
910
use std::fmt;
1011
use super::int::IntKind;
@@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> {
7778
pub currently_parsed_types: Vec<(Cursor, ItemId)>,
7879

7980
/// A HashSet with all the already parsed macro names. This is done to avoid
80-
/// hard errors while parsing duplicated macros.
81-
parsed_macros: HashSet<String>,
81+
/// hard errors while parsing duplicated macros, as well to allow macro
82+
/// expression parsing.
83+
parsed_macros: HashMap<Vec<u8>, cexpr::expr::EvalResult>,
8284

8385
/// The active replacements collected from replaces="xxx" annotations.
8486
replacements: HashMap<String, ItemId>,
@@ -243,7 +245,7 @@ impl<'ctx> BindgenContext<'ctx> {
243245

244246
/// Returns a mangled name as a rust identifier.
245247
pub fn rust_ident_raw(&self, name: &str) -> Ident {
246-
self.ext_cx().ident_of(name.borrow())
248+
self.ext_cx().ident_of(name)
247249
}
248250

249251
/// Iterate over all items that have been defined.
@@ -715,14 +717,21 @@ impl<'ctx> BindgenContext<'ctx> {
715717
}
716718

717719
/// Have we parsed the macro named `macro_name` already?
718-
pub fn parsed_macro(&self, macro_name: &str) -> bool {
719-
self.parsed_macros.contains(macro_name)
720+
pub fn parsed_macro(&self, macro_name: &[u8]) -> bool {
721+
self.parsed_macros.contains_key(macro_name)
722+
}
723+
724+
/// Get the currently parsed macros.
725+
pub fn parsed_macros(&self) -> &HashMap<Vec<u8>, cexpr::expr::EvalResult> {
726+
debug_assert!(!self.in_codegen_phase());
727+
&self.parsed_macros
720728
}
721729

722730
/// Mark the macro named `macro_name` as parsed.
723-
pub fn note_parsed_macro(&mut self, macro_name: String) {
724-
debug_assert!(!self.parsed_macros.contains(&macro_name));
725-
self.parsed_macros.insert(macro_name);
731+
pub fn note_parsed_macro(&mut self,
732+
id: Vec<u8>,
733+
value: cexpr::expr::EvalResult) {
734+
self.parsed_macros.insert(id, value);
726735
}
727736

728737
/// Are we in the codegen phase?

src/ir/int.rs

+35-6
Original file line numberDiff line numberDiff line change
@@ -36,29 +36,58 @@ pub enum IntKind {
3636
/// An `unsigned long long`.
3737
ULongLong,
3838

39+
/// A 8-bit signed integer.
40+
I8,
41+
42+
/// A 8-bit unsigned integer.
43+
U8,
44+
45+
/// A 16-bit signed integer.
46+
I16,
47+
3948
/// Either a `char16_t` or a `wchar_t`.
4049
U16,
4150

42-
/// A `char32_t`.
51+
/// A 32-bit signed integer.
52+
I32,
53+
54+
/// A 32-bit unsigned integer.
4355
U32,
4456

57+
/// A 64-bit signed integer.
58+
I64,
59+
60+
/// A 64-bit unsigned integer.
61+
U64,
62+
4563
/// An `int128_t`
4664
I128,
4765

4866
/// A `uint128_t`.
49-
U128, /* Though now we're at it we could add equivalents for the rust
50-
* types... */
67+
U128,
68+
69+
/// A custom integer type, used to allow custom macro types depending on
70+
/// range.
71+
Custom {
72+
/// The name of the type, which would be used without modification.
73+
name: &'static str,
74+
/// Whether the type is signed or not.
75+
is_signed: bool,
76+
},
5177
}
5278

5379
impl IntKind {
5480
/// Is this integral type signed?
5581
pub fn is_signed(&self) -> bool {
5682
use self::IntKind::*;
5783
match *self {
58-
Bool | UChar | UShort | UInt | ULong | ULongLong | U16 | U32 |
59-
U128 => false,
84+
Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 |
85+
U32 | U64 | U128 => false,
86+
87+
Char | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 |
88+
I128 => true,
6089

61-
Char | Short | Int | Long | LongLong | I128 => true,
90+
Custom { is_signed, .. } => is_signed,
6291
}
6392
}
6493
}

0 commit comments

Comments
 (0)