I need help with writing an application that expands ‘C’ macros and forwards the expanded macro text to a RecursiveASTVisitor that can rewrite the code.
I was under the impression that I could use clang::PPCallbacks to run the preprocessor phase prior to forwarding the preprocessed tokens to a RecursiveASTVisitor.
Consider the following ‘C’ source code with a number of test macros c:temptest2.c
.
#define TWO (2) // object like macro
#define THREE() (3) // function like macro with 0 args
#define FOUR() (4) // function like macro with 0 args
#define NUMSQUARED(x) ((x)*(x)) // function like macro with 1 arg
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define FUNC_MACRO(x) ((x) + 1)
#define NESTED_MACRO(a, b) (FUNC_MACRO(a) + NUMSQUARED(b) + FUNC_MACRO(FOUR()) + TWO + THREE())
/* COMMENT */
int main() {
// comment1
int a = NESTED_MACRO(1, 2);
// comment2
int b = MIN(1, TWO); // trailing comment
// comment3
int c = MIN(1, 2);
// comment4
int d = MIN(1, THREE());
// comment5
int f = MIN(1, NUMSQUARED(3));
// comment6
int g = MIN(MAX(1, 2), 3);
// comment7
int h = (a > 3) ? 4 : 5 /*FOO*/;
// comment8
return 1;
}
If I run clang -E -D_CRT_SECURE_NO_WARNINGS c:testtest2.c --
, The output will be stripped of comments and contain fully expanded macros (including nested function-like macros as follows:
C:Usersjohnc> clang -E -D_CRT_SECURE_NO_WARNINGS C:temptest2.c --
# 1 "C:\temp\test2.c"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 374 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "C:\temp\test2.c" 2
# 10 "C:\temp\test2.c"
int main() {
int a = (((1) + 1) + ((2)*(2)) + (((4)) + 1) + (2) + (3));
int b = (((1) <= ((2))) ? (1) : ((2)));
int c = (((1) <= (2)) ? (1) : (2));
int d = (((1) <= ((3))) ? (1) : ((3)));
int f = (((1) <= (((3)*(3)))) ? (1) : (((3)*(3))));
int g = ((((((1) > (2)) ? (1) : (2))) <= (3)) ? ((((1) > (2)) ? (1) : (2))) : (3));
int h = (a > 3) ? 4 : 5 ;
return 1;
}
C:Usersjohnc>
The problem
I need to be able to do something similar in a libTooling application and run a RecursiveASTVisitor on the expanded preprocessed code.
Partial Solution
I was able to produce similar output using a very simple stripped down clang::PPCallbacks
libTooling application. By extending a clang::PreprocessOnlyAction, I was able to set a few printing options which I discoverd when I was looking at the clang source.
class PreprocessorOnlyAction : public clang::PreprocessOnlyAction {
public:
void ExecuteAction() override {
//clang::PreprocessOnlyAction::ExecuteAction();
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
PP.addPPCallbacks(std::make_unique<PreprocessorOutputHandler>(PP));
// Configure PreprocessorOutputOptions for MSVC compatibility
clang::PreprocessorOutputOptions PPO;
PPO.ShowCPP = true;
PPO.ShowLineMarkers = true;
PPO.ShowMacros = true;
PPO.ShowComments = true;
PPO.ShowIncludeDirectives = true;
PPO.UseLineDirectives = true;
llvm::raw_ostream *out = &llvm::outs();
// Print preprocessed output
clang::DoPrintPreprocessedInput(PP, out, PPO);
}
};
I do not know how to get the raw expanded tokens from this preprocessor phase and forward them to a clang::RecursiveASTVisitor instance for further processing (rewriting – for example rewriting the ConditionalOperators by adding a comment or sommething trivial). Also I do not want to loose the comments.
LibTooling example that produces expanded macros
#include <clang/Frontend/FrontendActions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Lex/Preprocessor.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Tooling/Tooling.h>
class PreprocessorOutputHandler : public clang::PPCallbacks {
public:
explicit PreprocessorOutputHandler(clang::Preprocessor &PP) : PP(PP) {}
void MacroDefined(const clang::Token ¯oNameTok, const clang::MacroDirective *md) override {
//llvm::outs() << "Macro defined: " << macroNameTok.getIdentifierInfo()->getName() << "n";
}
void MacroExpands(const clang::Token ¯oNameTok, const clang::MacroDefinition &md,
clang::SourceRange range, const clang::MacroArgs *args) override {
//llvm::outs() << "Macro expands: " << macroNameTok.getIdentifierInfo()->getName() << "n";
}
void InclusionDirective(
clang::SourceLocation HashLoc,
const clang::Token &IncludeTok, llvm::StringRef FileName,
bool IsAngled, clang::CharSourceRange FilenameRange,
clang::OptionalFileEntryRef File,
llvm::StringRef SearchPath, llvm::StringRef RelativePath,
const clang::Module *Imported,
clang::SrcMgr::CharacteristicKind FileType) override {
//llvm::outs() << "Inclusion directive: " << FileType << "n";
}
private:
clang::Preprocessor &PP;
};
class PreprocessorOnlyAction : public clang::PreprocessOnlyAction {
public:
void ExecuteAction() override {
//clang::PreprocessOnlyAction::ExecuteAction();
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
PP.addPPCallbacks(std::make_unique<PreprocessorOutputHandler>(PP));
// Configure PreprocessorOutputOptions for MSVC compatibility
clang::PreprocessorOutputOptions PPO;
PPO.ShowCPP = true;
PPO.ShowLineMarkers = true;
PPO.ShowMacros = true;
PPO.ShowComments = true;
PPO.ShowIncludeDirectives = true;
PPO.UseLineDirectives = true;
llvm::raw_ostream *out = &llvm::outs();
// Print preprocessed output
clang::DoPrintPreprocessedInput(PP, out, PPO);
}
};
class DumpRawTokensAction : public clang::DumpRawTokensAction {
public:
void ExecuteAction() override {
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
PP.addPPCallbacks(std::make_unique<PreprocessorOutputHandler>(PP));
clang::DumpRawTokensAction::ExecuteAction();
}
};
class DumpTokensAction : public clang::DumpTokensAction {
public:
void ExecuteAction() override {
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
PP.addPPCallbacks(std::make_unique<PreprocessorOutputHandler>(PP));
clang::DumpTokensAction::ExecuteAction();
}
};
class PrintPreprocessedAction : public clang::PrintPreprocessedAction {
public:
void ExecuteAction() override {
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
PP.addPPCallbacks(std::make_unique<PreprocessorOutputHandler>(PP));
clang::PrintPreprocessedAction::ExecuteAction();
}
};
int main(int argc, const char **argv) {
llvm::cl::OptionCategory gToolCategory("Tool Category");
auto expectedParser = clang::tooling::CommonOptionsParser::create(
argc, argv, gToolCategory);
clang::tooling::ClangTool Tool(expectedParser->getCompilations(), expectedParser->getSourcePathList());
Tool.run(clang::tooling::newFrontendActionFactory<PreprocessorOnlyAction>().get());
//Tool.run(clang::tooling::newFrontendActionFactory<DumpRawTokensAction>().get());
//Tool.run(clang::tooling::newFrontendActionFactory<DumpTokensAction>().get());
//Tool.run(clang::tooling::newFrontendActionFactory<PrintPreprocessedAction>().get());
}
Output
When the above code is run with the arguments c:temptest2.c --
it produces the following output:
#line 1 "<built-in>"
#line 1 "C:\temp\test2.c"
#define TWO (2)
#define THREE() (3)
#define FOUR() (4)
#define NUMSQUARED(x) ((x)*(x))
#define MIN(a,b) (((a) <= (b)) ? (a) : (b))
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define FUNC_MACRO(x) ((x) + 1)
#define NESTED_MACRO(a,b) (FUNC_MACRO(a) + NUMSQUARED(b) + FUNC_MACRO(FOUR()) + TWO + THREE())
/* COMMENT */
int main() {
// comment1
int a = (((1) + 1) + ((2)*(2)) + (((4)) + 1) + (2) + (3));
// comment2
int b = (((1) <= ((2))) ? (1) : ((2))); // trailing comment
// comment3
int c = (((1) <= (2)) ? (1) : (2));
// comment4
int d = (((1) <= ((3))) ? (1) : ((3)));
// comment5
int f = (((1) <= (((3)*(3)))) ? (1) : (((3)*(3))));
// comment6
int g = ((((((1) > (2)) ? (1) : (2))) <= (3)) ? ((((1) > (2)) ? (1) : (2))) : (3));
// comment7
int h = (a > 3) ? 4 : 5 /*FOO*/;
// comment8
return 1;
}
The missing part of the solution
I need to integrate the following 2 classes to work with the above preprocessor – this is an example that rewrites simple ternary expressions as a starting point.
class CProbeVisitor : public clang::RecursiveASTVisitor<CProbeVisitor> {
public:
explicit CProbeVisitor(
clang::CompilerInstance& rCI,
clang::Rewriter& rRewriter)
: mCI{ rCI }
, mRewriter{ rRewriter }
{}
static bool shouldTraversePostOrder() {
// Must return true to traverse the AST in post-order.
return true;
}
bool VisitConditionalOperator(clang::ConditionalOperator *CO) {
clang::SourceLocation Loc = CO->getBeginLoc();
mRewriter.InsertText(Loc, "/* Ternary operator expanded through a macro */ ", true, true);
return false;
}
private:
clang::CompilerInstance& mCI;
clang::Rewriter& mRewriter;
};
class CProbeASTConsumer : public clang::ASTConsumer {
public:
/**
* Explicit constructor.
*
* @param rCI [in] Compiler Instance.
* @param rRewriter [in] Clang Rewriter.
*/
explicit CProbeASTConsumer(
clang::CompilerInstance& rCI,
clang::Rewriter& rRewriter)
: mCI{rCI}
, mRewriter{ rRewriter }
, mVisitor(rCI, rRewriter)
{}
//! Traverse the AST - starting with the translation unit.
void HandleTranslationUnit(clang::ASTContext& context) override {
mVisitor.TraverseDecl(context.getTranslationUnitDecl());
}
void Initialize(clang::ASTContext& Context) override {
mCI.getPreprocessor().addPPCallbacks(
std::make_unique<MacroCommentInserter>(
mCI.getPreprocessor(), mRewriter));
}
private:
clang::CompilerInstance& mCI;
clang::Rewriter& mRewriter;
CProbeVisitor mVisitor;
};