diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index f042fe9380..0a28e8f56b 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex, RwLock}; struct MacroCallback { macros: Arc>>, seen_hellos: Mutex, + seen_funcs: Mutex, } impl ParseCallbacks for MacroCallback { @@ -45,6 +46,10 @@ impl ParseCallbacks for MacroCallback { fn str_macro(&self, name: &str, value: &[u8]) { match name { + "TESTMACRO_STRING_EXPR" => { + assert_eq!(value, b"string"); + *self.seen_hellos.lock().unwrap() += 1; + } "TESTMACRO_STRING_EXPANDED" | "TESTMACRO_STRING" | "TESTMACRO_INTEGER" => { @@ -70,15 +75,64 @@ impl ParseCallbacks for MacroCallback { _ => None, } } + + fn func_macro(&self, name: &str, value: &[&[u8]]) { + match name { + "TESTMACRO_NONFUNCTIONAL" => { + panic!("func_macro was called for a non-functional macro"); + } + "TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)" => { + // Spaces are inserted into the right-hand side of a functional + // macro during reconstruction from the tokenization. This might + // change in the future, but it is safe by the definition of a + // token in C, whereas leaving the spaces out could change + // tokenization. + assert_eq!(value, &[b"-" as &[u8], b"TESTMACRO_INTEGER"]); + *self.seen_funcs.lock().unwrap() += 1; + } + "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { + assert_eq!(value, &[] as &[&[u8]]); + *self.seen_funcs.lock().unwrap() += 1; + } + "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { + assert_eq!( + value, + &[b"a" as &[u8], b"/", b"b", b"c", b"d", b"##", b"e"] + ); + *self.seen_funcs.lock().unwrap() += 1; + } + "TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => { + assert_eq!(value, &[b"b", b",", b"a"]); + *self.seen_funcs.lock().unwrap() += 1; + } + "TESTMACRO_STRING_FUNC_NON_UTF8(x)" => { + assert_eq!( + value, + &[b"(" as &[u8], b"x", b"\"\xff\xff\"", b")"] + ); + *self.seen_funcs.lock().unwrap() += 1; + } + _ => { + // The system might provide lots of functional macros. + // Ensure we did not miss handling one that we meant to handle. + assert!(!name.starts_with("TESTMACRO_"), "name = {}", name); + } + } + } } impl Drop for MacroCallback { fn drop(&mut self) { assert_eq!( *self.seen_hellos.lock().unwrap(), - 2, + 3, "str_macro handle was not called once for all relevant macros" - ) + ); + assert_eq!( + *self.seen_funcs.lock().unwrap(), + 5, + "func_macro handle was not called once for all relevant macros" + ); } } @@ -102,6 +156,7 @@ fn main() { .parse_callbacks(Box::new(MacroCallback { macros: macros.clone(), seen_hellos: Mutex::new(0), + seen_funcs: Mutex::new(0), })) .blacklist_function("my_prefixed_function_to_remove") .generate() diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index a20cf4b7ad..f8b2263f6d 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -7,6 +7,19 @@ #define TESTMACRO_STRING_EXPANDED TESTMACRO_STRING #define TESTMACRO_CUSTOMINTKIND_PATH 123 +// The following two macros are parsed the same by cexpr, but are semantically +// different. +#define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER) +#define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER) +#define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER +#define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e +#define TESTMACRO_FUNCTIONAL_SPLIT( a, \ + b) b,\ + a +//#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this +#define TESTMACRO_STRING_EXPR ("string") +#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "ÿÿ") /* invalid UTF-8 on purpose */ + #include enum { diff --git a/src/callbacks.rs b/src/callbacks.rs index 1b5445e642..1cd7f37b07 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -35,10 +35,19 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { None } - /// This will be run on every string macro. The callback can not influence the further + /// This will be run on every string macro. The callback cannot influence the further /// treatment of the macro, but may use the value to generate additional code or configuration. fn str_macro(&self, _name: &str, _value: &[u8]) {} + /// This will be run on every function-like macro. The callback cannot + /// influence the further treatment of the macro, but may use the value to + /// generate additional code or configuration. + /// + /// The first parameter represents the name and argument list (including the + /// parentheses) of the function-like macro. The second parameter represents + /// the expansion of the macro as a sequence of tokens. + fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} + /// This function should return whether, given an enum variant /// name, and value, this enum variant will forcibly be a constant. fn enum_variant_behavior( diff --git a/src/clang.rs b/src/clang.rs index 12ac46cebd..3ddf99dc8b 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -239,6 +239,17 @@ impl Cursor { } } + /// Is this Cursor pointing to a function-like macro definition? + /// Returns None if this cannot be determined with the available libclang + /// (it requires 3.9 or greater). + pub fn is_macro_function_like(&self) -> Option { + if clang_Cursor_isMacroFunctionLike::is_loaded() { + Some(unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 }) + } else { + None + } + } + /// Get the kind of referent this cursor is pointing to. pub fn kind(&self) -> CXCursorKind { self.x.kind @@ -698,30 +709,9 @@ impl Cursor { /// Gets the tokens that correspond to that cursor as `cexpr` tokens. pub fn cexpr_tokens(self) -> Vec { - use cexpr::token; - self.tokens() .iter() - .filter_map(|token| { - let kind = match token.kind { - CXToken_Punctuation => token::Kind::Punctuation, - CXToken_Literal => token::Kind::Literal, - CXToken_Identifier => token::Kind::Identifier, - CXToken_Keyword => token::Kind::Keyword, - // NB: cexpr is not too happy about comments inside - // expressions, so we strip them down here. - CXToken_Comment => return None, - _ => { - error!("Found unexpected token kind: {:?}", token); - return None; - } - }; - - Some(token::Token { - kind, - raw: token.spelling().to_vec().into_boxed_slice(), - }) - }) + .filter_map(|token| token.as_cexpr_token()) .collect() } @@ -793,13 +783,16 @@ impl<'a> Drop for RawTokens<'a> { } } -/// A raw clang token, that exposes only the kind and spelling. This is a +/// A raw clang token, that exposes only kind, spelling, and extent. This is a /// slightly more convenient version of `CXToken` which owns the spelling -/// string. +/// string and extent. #[derive(Debug)] pub struct ClangToken { spelling: CXString, - /// The kind of token, this is the same as the relevant member from + /// The extent of the token. This is the same as the relevant member from + /// `CXToken`. + pub extent: CXSourceRange, + /// The kind of the token. This is the same as the relevant member from /// `CXToken`. pub kind: CXTokenKind, } @@ -812,6 +805,30 @@ impl ClangToken { }; c_str.to_bytes() } + + /// Converts a ClangToken to a `cexpr` token if possible. + pub fn as_cexpr_token(&self) -> Option { + use cexpr::token; + + let kind = match self.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => { + error!("Found unexpected token kind: {:?}", self); + return None; + } + }; + + Some(token::Token { + kind, + raw: self.spelling().to_vec().into_boxed_slice(), + }) + } } impl Drop for ClangToken { @@ -834,7 +851,12 @@ impl<'a> Iterator for ClangTokenIterator<'a> { unsafe { let kind = clang_getTokenKind(*raw); let spelling = clang_getTokenSpelling(self.tu, *raw); - Some(ClangToken { kind, spelling }) + let extent = clang_getTokenExtent(self.tu, *raw); + Some(ClangToken { + kind, + extent, + spelling, + }) } } } diff --git a/src/ir/var.rs b/src/ir/var.rs index 0f05a3eecf..67b2e348ce 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -8,6 +8,7 @@ use super::item::Item; use super::ty::{FloatKind, TypeKind}; use crate::callbacks::MacroParsingBehavior; use crate::clang; +use crate::clang::ClangToken; use crate::parse::{ ClangItemParser, ClangSubItemParser, ParseError, ParseResult, }; @@ -130,6 +131,75 @@ fn default_macro_constant_type(value: i64) -> IntKind { } } +/// Determines whether a set of tokens from a CXCursor_MacroDefinition +/// represent a function-like macro. If so, calls the func_macro callback +/// and returns `Err(ParseError::Continue)` to signal to skip further +/// processing. If conversion to UTF-8 fails (it is performed only where it +/// should be infallible), then `Err(ParseError::Continue)` is returned as well. +fn handle_function_macro( + cursor: &clang::Cursor, + tokens: &[ClangToken], + callbacks: &dyn crate::callbacks::ParseCallbacks, +) -> Result<(), ParseError> { + fn is_abutting(a: &ClangToken, b: &ClangToken) -> bool { + unsafe { + clang_sys::clang_equalLocations( + clang_sys::clang_getRangeEnd(a.extent), + clang_sys::clang_getRangeStart(b.extent), + ) != 0 + } + } + + let is_functional_macro = + // If we have libclang >= 3.9, we can use `is_macro_function_like()` and + // avoid checking for abutting tokens ourselves. + cursor.is_macro_function_like().unwrap_or_else(|| { + // If we cannot get a definitive answer from clang, we instead check + // for a parenthesis token immediately adjacent to (that is, + // abutting) the first token in the macro definition. + // TODO: Once we don't need the fallback check here, we can hoist + // the `is_macro_function_like` check into this function's caller, + // and thus avoid allocating the `tokens` vector for non-functional + // macros. + match tokens.get(0..2) { + Some([a, b]) => is_abutting(&a, &b) && b.spelling() == b"(", + _ => false, + } + }); + + if !is_functional_macro { + return Ok(()); + } + + let is_closing_paren = |t: &ClangToken| { + // Test cheap token kind before comparing exact spellings. + t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")" + }; + let boundary = tokens.iter().position(is_closing_paren); + + let mut spelled = tokens.iter().map(ClangToken::spelling); + // Add 1, to convert index to length. + let left = spelled + .by_ref() + .take(boundary.ok_or(ParseError::Continue)? + 1); + let left = left.collect::>().concat(); + let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?; + let right = spelled; + // Drop last token with LLVM < 4.0, due to an LLVM bug. + // + // See: + // https://bugs.llvm.org//show_bug.cgi?id=9069 + let len = match (right.len(), crate::clang_version().parsed) { + (len, Some((v, _))) if len > 0 && v < 4 => len - 1, + (len, _) => len, + }; + let right: Vec<_> = right.take(len).collect(); + callbacks.func_macro(&left, &right); + + // We handled the macro, skip future macro processing. + Err(ParseError::Continue) +} + impl ClangSubItemParser for Var { fn parse( cursor: clang::Cursor, @@ -140,6 +210,8 @@ impl ClangSubItemParser for Var { use clang_sys::*; match cursor.kind() { CXCursor_MacroDefinition => { + let tokens: Vec<_> = cursor.tokens().iter().collect(); + if let Some(callbacks) = ctx.parse_callbacks() { match callbacks.will_parse_macro(&cursor.spelling()) { MacroParsingBehavior::Ignore => { @@ -147,9 +219,11 @@ impl ClangSubItemParser for Var { } MacroParsingBehavior::Default => {} } + + handle_function_macro(&cursor, &tokens, callbacks)?; } - let value = parse_macro(ctx, &cursor); + let value = parse_macro(ctx, &tokens); let (id, value) = match value { Some(v) => v, @@ -316,11 +390,14 @@ impl ClangSubItemParser for Var { /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &BindgenContext, - cursor: &clang::Cursor, + tokens: &[ClangToken], ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; - let mut cexpr_tokens = cursor.cexpr_tokens(); + let mut cexpr_tokens: Vec<_> = tokens + .iter() + .filter_map(ClangToken::as_cexpr_token) + .collect(); let parser = expr::IdentifierParser::new(ctx.parsed_macros());