Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a callback for function-like macros #1793

Merged
merged 5 commits into from
Jun 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions bindgen-integration/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex, RwLock};
struct MacroCallback {
macros: Arc<RwLock<HashSet<String>>>,
seen_hellos: Mutex<u32>,
seen_funcs: Mutex<u32>,
}

impl ParseCallbacks for MacroCallback {
Expand Down Expand Up @@ -45,6 +46,10 @@ impl ParseCallbacks for MacroCallback {

fn str_macro(&self, name: &str, value: &[u8]) {
match name {
"TESTMACRO_STRING_EXPR" => {
assert_eq!(value, b"string");
*self.seen_hellos.lock().unwrap() += 1;
}
"TESTMACRO_STRING_EXPANDED" |
"TESTMACRO_STRING" |
"TESTMACRO_INTEGER" => {
Expand All @@ -70,15 +75,64 @@ impl ParseCallbacks for MacroCallback {
_ => None,
}
}

fn func_macro(&self, name: &str, value: &[&[u8]]) {
match name {
"TESTMACRO_NONFUNCTIONAL" => {
panic!("func_macro was called for a non-functional macro");
}
"TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)" => {
// Spaces are inserted into the right-hand side of a functional
// macro during reconstruction from the tokenization. This might
// change in the future, but it is safe by the definition of a
// token in C, whereas leaving the spaces out could change
// tokenization.
assert_eq!(value, &[b"-" as &[u8], b"TESTMACRO_INTEGER"]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => {
assert_eq!(value, &[] as &[&[u8]]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => {
assert_eq!(
value,
&[b"a" as &[u8], b"/", b"b", b"c", b"d", b"##", b"e"]
);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => {
assert_eq!(value, &[b"b", b",", b"a"]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_STRING_FUNC_NON_UTF8(x)" => {
assert_eq!(
value,
&[b"(" as &[u8], b"x", b"\"\xff\xff\"", b")"]
);
*self.seen_funcs.lock().unwrap() += 1;
}
_ => {
// The system might provide lots of functional macros.
// Ensure we did not miss handling one that we meant to handle.
assert!(!name.starts_with("TESTMACRO_"), "name = {}", name);
}
}
}
}

impl Drop for MacroCallback {
fn drop(&mut self) {
assert_eq!(
*self.seen_hellos.lock().unwrap(),
2,
3,
"str_macro handle was not called once for all relevant macros"
)
);
assert_eq!(
*self.seen_funcs.lock().unwrap(),
5,
"func_macro handle was not called once for all relevant macros"
);
}
}

Expand All @@ -102,6 +156,7 @@ fn main() {
.parse_callbacks(Box::new(MacroCallback {
macros: macros.clone(),
seen_hellos: Mutex::new(0),
seen_funcs: Mutex::new(0),
}))
.blacklist_function("my_prefixed_function_to_remove")
.generate()
Expand Down
13 changes: 13 additions & 0 deletions bindgen-integration/cpp/Test.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
#define TESTMACRO_STRING_EXPANDED TESTMACRO_STRING
#define TESTMACRO_CUSTOMINTKIND_PATH 123

// The following two macros are parsed the same by cexpr, but are semantically
// different.
#define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER)
#define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)
#define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER
#define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e
#define TESTMACRO_FUNCTIONAL_SPLIT( a, \
b) b,\
a
//#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this
#define TESTMACRO_STRING_EXPR ("string")
#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "ÿÿ") /* invalid UTF-8 on purpose */

#include <cwchar>

enum {
Expand Down
11 changes: 10 additions & 1 deletion src/callbacks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe {
None
}

/// This will be run on every string macro. The callback can not influence the further
/// This will be run on every string macro. The callback cannot influence the further
/// treatment of the macro, but may use the value to generate additional code or configuration.
fn str_macro(&self, _name: &str, _value: &[u8]) {}

/// This will be run on every function-like macro. The callback cannot
/// influence the further treatment of the macro, but may use the value to
/// generate additional code or configuration.
///
/// The first parameter represents the name and argument list (including the
/// parentheses) of the function-like macro. The second parameter represents
/// the expansion of the macro as a sequence of tokens.
fn func_macro(&self, _name: &str, _value: &[&[u8]]) {}

/// This function should return whether, given an enum variant
/// name, and value, this enum variant will forcibly be a constant.
fn enum_variant_behavior(
Expand Down
74 changes: 48 additions & 26 deletions src/clang.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@ impl Cursor {
}
}

/// Is this Cursor pointing to a function-like macro definition?
/// Returns None if this cannot be determined with the available libclang
/// (it requires 3.9 or greater).
pub fn is_macro_function_like(&self) -> Option<bool> {
if clang_Cursor_isMacroFunctionLike::is_loaded() {
Some(unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 })
} else {
None
}
}

/// Get the kind of referent this cursor is pointing to.
pub fn kind(&self) -> CXCursorKind {
self.x.kind
Expand Down Expand Up @@ -698,30 +709,9 @@ impl Cursor {

/// Gets the tokens that correspond to that cursor as `cexpr` tokens.
pub fn cexpr_tokens(self) -> Vec<cexpr::token::Token> {
use cexpr::token;

self.tokens()
.iter()
.filter_map(|token| {
let kind = match token.kind {
CXToken_Punctuation => token::Kind::Punctuation,
CXToken_Literal => token::Kind::Literal,
CXToken_Identifier => token::Kind::Identifier,
CXToken_Keyword => token::Kind::Keyword,
// NB: cexpr is not too happy about comments inside
// expressions, so we strip them down here.
CXToken_Comment => return None,
_ => {
error!("Found unexpected token kind: {:?}", token);
return None;
}
};

Some(token::Token {
kind,
raw: token.spelling().to_vec().into_boxed_slice(),
})
})
.filter_map(|token| token.as_cexpr_token())
.collect()
}

Expand Down Expand Up @@ -793,13 +783,16 @@ impl<'a> Drop for RawTokens<'a> {
}
}

/// A raw clang token, that exposes only the kind and spelling. This is a
/// A raw clang token, that exposes only kind, spelling, and extent. This is a
/// slightly more convenient version of `CXToken` which owns the spelling
/// string.
/// string and extent.
#[derive(Debug)]
pub struct ClangToken {
spelling: CXString,
/// The kind of token, this is the same as the relevant member from
/// The extent of the token. This is the same as the relevant member from
/// `CXToken`.
pub extent: CXSourceRange,
/// The kind of the token. This is the same as the relevant member from
/// `CXToken`.
pub kind: CXTokenKind,
}
Expand All @@ -812,6 +805,30 @@ impl ClangToken {
};
c_str.to_bytes()
}

/// Converts a ClangToken to a `cexpr` token if possible.
pub fn as_cexpr_token(&self) -> Option<cexpr::token::Token> {
use cexpr::token;

let kind = match self.kind {
CXToken_Punctuation => token::Kind::Punctuation,
CXToken_Literal => token::Kind::Literal,
CXToken_Identifier => token::Kind::Identifier,
CXToken_Keyword => token::Kind::Keyword,
// NB: cexpr is not too happy about comments inside
// expressions, so we strip them down here.
CXToken_Comment => return None,
_ => {
error!("Found unexpected token kind: {:?}", self);
return None;
}
};

Some(token::Token {
kind,
raw: self.spelling().to_vec().into_boxed_slice(),
})
}
}

impl Drop for ClangToken {
Expand All @@ -834,7 +851,12 @@ impl<'a> Iterator for ClangTokenIterator<'a> {
unsafe {
let kind = clang_getTokenKind(*raw);
let spelling = clang_getTokenSpelling(self.tu, *raw);
Some(ClangToken { kind, spelling })
let extent = clang_getTokenExtent(self.tu, *raw);
Some(ClangToken {
kind,
extent,
spelling,
})
}
}
}
Expand Down
83 changes: 80 additions & 3 deletions src/ir/var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use super::item::Item;
use super::ty::{FloatKind, TypeKind};
use crate::callbacks::MacroParsingBehavior;
use crate::clang;
use crate::clang::ClangToken;
use crate::parse::{
ClangItemParser, ClangSubItemParser, ParseError, ParseResult,
};
Expand Down Expand Up @@ -130,6 +131,75 @@ fn default_macro_constant_type(value: i64) -> IntKind {
}
}

/// Determines whether a set of tokens from a CXCursor_MacroDefinition
/// represent a function-like macro. If so, calls the func_macro callback
/// and returns `Err(ParseError::Continue)` to signal to skip further
/// processing. If conversion to UTF-8 fails (it is performed only where it
/// should be infallible), then `Err(ParseError::Continue)` is returned as well.
fn handle_function_macro(
cursor: &clang::Cursor,
tokens: &[ClangToken],
callbacks: &dyn crate::callbacks::ParseCallbacks,
) -> Result<(), ParseError> {
fn is_abutting(a: &ClangToken, b: &ClangToken) -> bool {
unsafe {
clang_sys::clang_equalLocations(
clang_sys::clang_getRangeEnd(a.extent),
clang_sys::clang_getRangeStart(b.extent),
) != 0
}
}

let is_functional_macro =
// If we have libclang >= 3.9, we can use `is_macro_function_like()` and
// avoid checking for abutting tokens ourselves.
cursor.is_macro_function_like().unwrap_or_else(|| {
// If we cannot get a definitive answer from clang, we instead check
// for a parenthesis token immediately adjacent to (that is,
// abutting) the first token in the macro definition.
// TODO: Once we don't need the fallback check here, we can hoist
// the `is_macro_function_like` check into this function's caller,
// and thus avoid allocating the `tokens` vector for non-functional
// macros.
match tokens.get(0..2) {
Some([a, b]) => is_abutting(&a, &b) && b.spelling() == b"(",
_ => false,
}
});

if !is_functional_macro {
return Ok(());
}

let is_closing_paren = |t: &ClangToken| {
// Test cheap token kind before comparing exact spellings.
t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")"
};
let boundary = tokens.iter().position(is_closing_paren);

let mut spelled = tokens.iter().map(ClangToken::spelling);
// Add 1, to convert index to length.
let left = spelled
.by_ref()
.take(boundary.ok_or(ParseError::Continue)? + 1);
let left = left.collect::<Vec<_>>().concat();
let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?;
let right = spelled;
// Drop last token with LLVM < 4.0, due to an LLVM bug.
//
// See:
// https://bugs.llvm.org//show_bug.cgi?id=9069
let len = match (right.len(), crate::clang_version().parsed) {
(len, Some((v, _))) if len > 0 && v < 4 => len - 1,
(len, _) => len,
};
let right: Vec<_> = right.take(len).collect();
callbacks.func_macro(&left, &right);

// We handled the macro, skip future macro processing.
Err(ParseError::Continue)
}

impl ClangSubItemParser for Var {
fn parse(
cursor: clang::Cursor,
Expand All @@ -140,16 +210,20 @@ impl ClangSubItemParser for Var {
use clang_sys::*;
match cursor.kind() {
CXCursor_MacroDefinition => {
let tokens: Vec<_> = cursor.tokens().iter().collect();

if let Some(callbacks) = ctx.parse_callbacks() {
match callbacks.will_parse_macro(&cursor.spelling()) {
MacroParsingBehavior::Ignore => {
return Err(ParseError::Continue);
}
MacroParsingBehavior::Default => {}
}

handle_function_macro(&cursor, &tokens, callbacks)?;
}

let value = parse_macro(ctx, &cursor);
let value = parse_macro(ctx, &tokens);

let (id, value) = match value {
Some(v) => v,
Expand Down Expand Up @@ -316,11 +390,14 @@ impl ClangSubItemParser for Var {
/// Try and parse a macro using all the macros parsed until now.
fn parse_macro(
ctx: &BindgenContext,
cursor: &clang::Cursor,
tokens: &[ClangToken],
) -> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
use cexpr::expr;

let mut cexpr_tokens = cursor.cexpr_tokens();
let mut cexpr_tokens: Vec<_> = tokens
.iter()
.filter_map(ClangToken::as_cexpr_token)
.collect();

let parser = expr::IdentifierParser::new(ctx.parsed_macros());

Expand Down