diff --git a/Cargo.toml b/Cargo.toml index 4e455fa9a8..97bcb82f93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ env_logger = "0.3" rustc-serialize = "0.3.19" syntex_syntax = "0.44" regex = "0.1" +cexpr = "0.2" [dependencies.aster] features = ["with-syntex"] diff --git a/src/chooser.rs b/src/chooser.rs new file mode 100644 index 0000000000..10a77dc9c4 --- /dev/null +++ b/src/chooser.rs @@ -0,0 +1,14 @@ +//! A public API for more fine-grained customization of bindgen behavior. + +pub use ir::int::IntKind; +use std::fmt; + +/// A trait to allow configuring different kinds of types in different +/// situations. +pub trait TypeChooser: fmt::Debug { + /// The integer kind an integer macro should have, given a name and the + /// value of that macro, or `None` if you want the default to be chosen. + fn int_macro(&self, _name: &str, _value: i64) -> Option { + None + } +} diff --git a/src/clang.rs b/src/clang.rs index e6d781234e..177d7dabd8 100755 --- a/src/clang.rs +++ b/src/clang.rs @@ -4,8 +4,9 @@ #![allow(non_upper_case_globals, dead_code)] +use cexpr; use clangll::*; -use std::{mem, ptr}; +use std::{mem, ptr, slice}; use std::ffi::{CStr, CString}; use std::fmt; use std::hash::Hash; @@ -1051,18 +1052,18 @@ impl TranslationUnit { let range = cursor.extent(); let mut tokens = vec![]; unsafe { - let mut token_ptr = ::std::ptr::null_mut(); + let mut token_ptr = ptr::null_mut(); let mut num_tokens: c_uint = 0; clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens); if token_ptr.is_null() { return None; } - let token_array = ::std::slice::from_raw_parts(token_ptr, - num_tokens as usize); + + let token_array = slice::from_raw_parts(token_ptr, + num_tokens as usize); for &token in token_array.iter() { let kind = clang_getTokenKind(token); - let spelling: String = clang_getTokenSpelling(self.x, token) - .into(); + let spelling = clang_getTokenSpelling(self.x, token).into(); tokens.push(Token { kind: kind, @@ -1073,6 +1074,62 @@ impl TranslationUnit { } Some(tokens) } + + /// Convert a set of tokens from clang into `cexpr` tokens, for further + /// processing. + pub fn cexpr_tokens(&self, + cursor: &Cursor) + -> Option> { + use cexpr::token; + + let mut tokens = match self.tokens(cursor) { + Some(tokens) => tokens, + None => return None, + }; + + // FIXME(emilio): LLVM 3.9 at least always include an extra token for no + // good reason (except if we're at EOF). So we do this kind of hack, + // where we skip known-to-cause problems trailing punctuation and + // trailing keywords. + // + // This is sort of unfortunate, though :(. + // + // I'll try to get it fixed in LLVM if I have the time to submit a + // patch. + let mut trim_last_token = false; + if let Some(token) = tokens.last() { + // The starting of the next macro. + trim_last_token |= token.spelling == "#" && + token.kind == CXToken_Punctuation; + + // A following keyword of any kind, like a following declaration. + trim_last_token |= token.kind == CXToken_Keyword; + } + + if trim_last_token { + tokens.pop().unwrap(); + } + + Some(tokens.into_iter() + .filter_map(|token| { + let kind = match token.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => panic!("Found unexpected token kind: {}", token.kind), + }; + + Some(token::Token { + kind: kind, + raw: token.spelling.into_bytes().into_boxed_slice(), + }) + }) + .collect::>()) + } } impl Drop for TranslationUnit { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 7fa8b19c1e..0c1c31da7e 100755 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1453,8 +1453,19 @@ impl ToRustTy for Type { IntKind::ULong => raw!(c_ulong), IntKind::LongLong => raw!(c_longlong), IntKind::ULongLong => raw!(c_ulonglong), + + IntKind::I8 => aster::ty::TyBuilder::new().i8(), + IntKind::U8 => aster::ty::TyBuilder::new().u8(), + IntKind::I16 => aster::ty::TyBuilder::new().i16(), IntKind::U16 => aster::ty::TyBuilder::new().u16(), + IntKind::I32 => aster::ty::TyBuilder::new().i32(), IntKind::U32 => aster::ty::TyBuilder::new().u32(), + IntKind::I64 => aster::ty::TyBuilder::new().i64(), + IntKind::U64 => aster::ty::TyBuilder::new().u64(), + IntKind::Custom { name, .. } => { + let ident = ctx.rust_ident_raw(name); + quote_ty!(ctx.ext_cx(), $ident) + } // FIXME: This doesn't generate the proper alignment, but we // can't do better right now. We should be able to use // i128/u128 when they're available. diff --git a/src/ir/context.rs b/src/ir/context.rs index f11b387a09..00ece52975 100644 --- a/src/ir/context.rs +++ b/src/ir/context.rs @@ -1,10 +1,11 @@ //! Common context that is passed around during parsing and codegen. use BindgenOptions; +use cexpr; use clang::{self, Cursor}; use parse::ClangItemParser; -use std::borrow::{Borrow, Cow}; -use std::collections::{HashMap, HashSet, hash_map}; +use std::borrow::Cow; +use std::collections::{HashMap, hash_map}; use std::collections::btree_map::{self, BTreeMap}; use std::fmt; use super::int::IntKind; @@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> { pub currently_parsed_types: Vec<(Cursor, ItemId)>, /// A HashSet with all the already parsed macro names. This is done to avoid - /// hard errors while parsing duplicated macros. - parsed_macros: HashSet, + /// hard errors while parsing duplicated macros, as well to allow macro + /// expression parsing. + parsed_macros: HashMap, cexpr::expr::EvalResult>, /// The active replacements collected from replaces="xxx" annotations. replacements: HashMap, @@ -243,7 +245,7 @@ impl<'ctx> BindgenContext<'ctx> { /// Returns a mangled name as a rust identifier. pub fn rust_ident_raw(&self, name: &str) -> Ident { - self.ext_cx().ident_of(name.borrow()) + self.ext_cx().ident_of(name) } /// Iterate over all items that have been defined. @@ -715,14 +717,21 @@ impl<'ctx> BindgenContext<'ctx> { } /// Have we parsed the macro named `macro_name` already? - pub fn parsed_macro(&self, macro_name: &str) -> bool { - self.parsed_macros.contains(macro_name) + pub fn parsed_macro(&self, macro_name: &[u8]) -> bool { + self.parsed_macros.contains_key(macro_name) + } + + /// Get the currently parsed macros. + pub fn parsed_macros(&self) -> &HashMap, cexpr::expr::EvalResult> { + debug_assert!(!self.in_codegen_phase()); + &self.parsed_macros } /// Mark the macro named `macro_name` as parsed. - pub fn note_parsed_macro(&mut self, macro_name: String) { - debug_assert!(!self.parsed_macros.contains(¯o_name)); - self.parsed_macros.insert(macro_name); + pub fn note_parsed_macro(&mut self, + id: Vec, + value: cexpr::expr::EvalResult) { + self.parsed_macros.insert(id, value); } /// Are we in the codegen phase? diff --git a/src/ir/int.rs b/src/ir/int.rs index a18e4c588a..2d85db8385 100644 --- a/src/ir/int.rs +++ b/src/ir/int.rs @@ -36,18 +36,44 @@ pub enum IntKind { /// An `unsigned long long`. ULongLong, + /// A 8-bit signed integer. + I8, + + /// A 8-bit unsigned integer. + U8, + + /// A 16-bit signed integer. + I16, + /// Either a `char16_t` or a `wchar_t`. U16, - /// A `char32_t`. + /// A 32-bit signed integer. + I32, + + /// A 32-bit unsigned integer. U32, + /// A 64-bit signed integer. + I64, + + /// A 64-bit unsigned integer. + U64, + /// An `int128_t` I128, /// A `uint128_t`. - U128, /* Though now we're at it we could add equivalents for the rust - * types... */ + U128, + + /// A custom integer type, used to allow custom macro types depending on + /// range. + Custom { + /// The name of the type, which would be used without modification. + name: &'static str, + /// Whether the type is signed or not. + is_signed: bool, + }, } impl IntKind { @@ -55,10 +81,13 @@ impl IntKind { pub fn is_signed(&self) -> bool { use self::IntKind::*; match *self { - Bool | UChar | UShort | UInt | ULong | ULongLong | U16 | U32 | - U128 => false, + Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 | + U32 | U64 | U128 => false, + + Char | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 | + I128 => true, - Char | Short | Int | Long | LongLong | I128 => true, + Custom { is_signed, .. } => is_signed, } } } diff --git a/src/ir/var.rs b/src/ir/var.rs index 33e5624267..047e8642cf 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -1,7 +1,9 @@ //! Intermediate representation of variables. +use cexpr; use clang; use parse::{ClangItemParser, ClangSubItemParser, ParseError, ParseResult}; +use std::num::Wrapping; use super::context::BindgenContext; use super::function::cursor_mangling; use super::int::IntKind; @@ -73,43 +75,65 @@ impl ClangSubItemParser for Var { ctx: &mut BindgenContext) -> Result, ParseError> { use clangll::*; + use cexpr::expr::EvalResult; match cursor.kind() { CXCursor_MacroDefinition => { - let value = parse_int_literal_tokens(&cursor, - ctx.translation_unit()); + let value = parse_macro(ctx, &cursor, ctx.translation_unit()); - let value = match value { + let (id, value) = match value { Some(v) => v, None => return Err(ParseError::Continue), }; - let name = cursor.spelling(); - if name.is_empty() { - warn!("Empty macro name?"); - return Err(ParseError::Continue); - } + assert!(!id.is_empty(), "Empty macro name?"); - if ctx.parsed_macro(&name) { + if ctx.parsed_macro(&id) { + let name = String::from_utf8(id).unwrap(); warn!("Duplicated macro definition: {}", name); return Err(ParseError::Continue); } - ctx.note_parsed_macro(name.clone()); - - let ty = if value < 0 { - Item::builtin_type(TypeKind::Int(IntKind::Int), true, ctx) - } else if value.abs() > u32::max_value() as i64 { - Item::builtin_type(TypeKind::Int(IntKind::ULongLong), - true, - ctx) - } else { - Item::builtin_type(TypeKind::Int(IntKind::UInt), true, ctx) + + // NB: It's important to "note" the macro even if the result is + // not an integer, otherwise we might loose other kind of + // derived macros. + ctx.note_parsed_macro(id.clone(), value.clone()); + + // NOTE: Unwrapping, here and above, is safe, because the + // identifier of a token comes straight from clang, and we + // enforce utf8 there, so we should have already panicked at + // this point. + let name = String::from_utf8(id).unwrap(); + let (int_kind, val) = match value { + // TODO(emilio): Handle the non-invalid ones! + EvalResult::Float(..) | + EvalResult::Char(..) | + EvalResult::Str(..) | + EvalResult::Invalid => return Err(ParseError::Continue), + + EvalResult::Int(Wrapping(value)) => { + let kind = ctx.options().type_chooser.as_ref() + .and_then(|c| c.int_macro(&name, value)) + .unwrap_or_else(|| { + if value < 0 { + if value < i32::min_value() as i64 { + IntKind::LongLong + } else { + IntKind::Int + } + } else if value > u32::max_value() as i64 { + IntKind::ULongLong + } else { + IntKind::UInt + } + }); + + (kind, value) + } }; - Ok(ParseResult::New(Var::new(name, - None, - ty, - Some(value), - true), + let ty = Item::builtin_type(TypeKind::Int(int_kind), true, ctx); + + Ok(ParseResult::New(Var::new(name, None, ty, Some(val), true), Some(cursor))) } CXCursor_VarDecl => { @@ -153,49 +177,43 @@ impl ClangSubItemParser for Var { } } -/// Try and parse the immediately found tokens from an unit (if any) to integers +/// Try and parse a macro using all the macros parsed until now. +fn parse_macro(ctx: &BindgenContext, + cursor: &clang::Cursor, + unit: &clang::TranslationUnit) + -> Option<(Vec, cexpr::expr::EvalResult)> { + use cexpr::{expr, nom}; + + let cexpr_tokens = match unit.cexpr_tokens(cursor) { + None => return None, + Some(tokens) => tokens, + }; + + let parser = expr::IdentifierParser::new(ctx.parsed_macros()); + let result = parser.macro_definition(&cexpr_tokens); + + match result { + nom::IResult::Done(_, (id, val)) => Some((id.into(), val)), + _ => None, + } +} + fn parse_int_literal_tokens(cursor: &clang::Cursor, unit: &clang::TranslationUnit) -> Option { - use clangll::{CXToken_Literal, CXToken_Punctuation}; + use cexpr::{expr, nom}; + use cexpr::expr::EvalResult; - let tokens = match unit.tokens(cursor) { + let cexpr_tokens = match unit.cexpr_tokens(cursor) { None => return None, Some(tokens) => tokens, }; - let mut literal = None; - let mut negate = false; - for token in tokens.into_iter() { - match token.kind { - CXToken_Punctuation if token.spelling == "-" => { - negate = !negate; - } - CXToken_Literal => { - literal = Some(token.spelling); - break; - } - _ => { - // Reset values if we found anything else - negate = false; - literal = None; - } - } + // TODO(emilio): We can try to parse other kinds of literals. + match expr::expr(&cexpr_tokens) { + nom::IResult::Done(_, EvalResult::Int(Wrapping(val))) => Some(val), + _ => None, } - - literal.and_then(|lit| { - if lit.starts_with("0x") { - // TODO: try to preserve hex literals? - i64::from_str_radix(&lit[2..], 16).ok() - } else if lit == "0" { - Some(0) - } else if lit.starts_with("0") { - i64::from_str_radix(&lit[1..], 8).ok() - } else { - lit.parse().ok() - } - }) - .map(|lit| if negate { -lit } else { lit }) } fn get_integer_literal_from_cursor(cursor: &clang::Cursor, diff --git a/src/lib.rs b/src/lib.rs index 03dac3cc88..2c6eebbe71 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,7 @@ #[macro_use] extern crate cfg_if; +extern crate cexpr; extern crate syntex_syntax as syntax; extern crate aster; extern crate quasi; @@ -62,6 +63,8 @@ mod parse; mod regex_set; mod uses; +pub mod chooser; + #[cfg(rustfmt)] mod codegen; @@ -228,6 +231,13 @@ impl Builder { self } + /// Allows configuring types in different situations, see the `TypeChooser` + /// documentation. + pub fn type_chooser(mut self, cb: Box) -> Self { + self.options.type_chooser = Some(cb); + self + } + /// Generate the Rust bindings using the options built up thus far. pub fn generate<'ctx>(self) -> Result, ()> { Bindings::generate(self.options, None) @@ -319,6 +329,10 @@ pub struct BindgenOptions { /// Generate a dummy C/C++ file that includes the header and has dummy uses /// of all types defined therein. See the `uses` module for more. pub dummy_uses: Option, + + /// A user-provided type chooser to allow customizing different kinds of + /// situations. + pub type_chooser: Option>, } impl Default for BindgenOptions { @@ -346,6 +360,7 @@ impl Default for BindgenOptions { clang_args: vec![], input_header: None, dummy_uses: None, + type_chooser: None, } } } diff --git a/tests/expectations/tests/jsval_layout_opaque.rs b/tests/expectations/tests/jsval_layout_opaque.rs index f3c1014e45..fa611f2045 100644 --- a/tests/expectations/tests/jsval_layout_opaque.rs +++ b/tests/expectations/tests/jsval_layout_opaque.rs @@ -24,8 +24,9 @@ impl ::std::clone::Clone for __BindgenUnionField { fn clone(&self) -> Self { Self::new() } } impl ::std::marker::Copy for __BindgenUnionField { } -pub const JSVAL_ALIGNMENT: ::std::os::raw::c_uint = 8; pub const JSVAL_TAG_SHIFT: ::std::os::raw::c_uint = 47; +pub const JSVAL_PAYLOAD_MASK: ::std::os::raw::c_ulonglong = 140737488355327; +pub const JSVAL_TAG_MASK: ::std::os::raw::c_longlong = -140737488355328; #[repr(u8)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum JSValueType { diff --git a/tests/expectations/tests/macro-expr-basic.rs b/tests/expectations/tests/macro-expr-basic.rs new file mode 100644 index 0000000000..7a5c71e041 --- /dev/null +++ b/tests/expectations/tests/macro-expr-basic.rs @@ -0,0 +1,14 @@ +/* automatically generated by rust-bindgen */ + + +#![allow(non_snake_case)] + + +pub const FOO: ::std::os::raw::c_uint = 1; +pub const BAR: ::std::os::raw::c_uint = 4; +pub const BAZ: ::std::os::raw::c_uint = 5; +pub const BARR: ::std::os::raw::c_uint = 1; +pub const BAZZ: ::std::os::raw::c_uint = 7; +pub const I_RAN_OUT_OF_DUMB_NAMES: ::std::os::raw::c_uint = 7; +pub const HAZ_A_COMMENT: ::std::os::raw::c_uint = 1; +pub const HAZ_A_COMMENT_INSIDE: ::std::os::raw::c_uint = 2; diff --git a/tests/headers/macro-expr-basic.h b/tests/headers/macro-expr-basic.h new file mode 100644 index 0000000000..55b11367d1 --- /dev/null +++ b/tests/headers/macro-expr-basic.h @@ -0,0 +1,12 @@ +#define FOO 1 +#define BAR 4 +#define BAZ (FOO + BAR) + +#define BARR (1 << 0) +#define BAZZ ((1 << 1) + BAZ) +#define I_RAN_OUT_OF_DUMB_NAMES (BARR | BAZZ) + +/* I haz a comment */ +#define HAZ_A_COMMENT BARR + +#define HAZ_A_COMMENT_INSIDE (/* comment for real */ BARR + FOO)