I am developing a clang libTooling application that instruments (rewrites) C code. In order to prepare the code for the a RecursiveASTVisitor (where the final code is rewritten), I need to pre-expand all the macro invocations in the C code. I am facing challenges with handling nested macros and function-like macros where a parameter is also a macros (I think this could be considered nested macro variant).
I implemented a partial solution by creating a MacroExpander (a clang::PPCallbacks subclass) that partially succeeds in expanding macros.
Examples and Expected output
Consider for example a pair of macros defined as:
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
Here are a few simple examples of expected macro expansions:
MIN(1, 7) => (((1) <= (7)) ? (1) : (7))
MAX(5, 8) => (((5) > (8)) ? (5) : (8))
MIN(MAX(5, 8), 7) => ((((((5) > (8)) ? (5) : (8))) <= (7)) ? ((((5) > (8)) ? (5) : (8))) : (7))
The last example is nested, and can be obtained by substututing MAX(5, 8)
for a
in the MIN(a, b) macro. i.e:
MIN(MAX(5, 8), 7) => (((MAX(5, 8)) <= (7)) ? (MAX(5, 8)) : (7))
Partially working application
The code below includes a partially working MacroExpander class. This expander has the nesting and macro parameter expansion limitations described earlier. Additionally, I would like some guidance on how to correctly handle the token expansion logic. I used a brute force approach which seems overkill where I needed to handle token pasting ##
and stringizing #
operators in addition to spacing around keywords & punctuation. I am sure this has to be a better way to do this while expanding C code tokens.
The MacroExpander
class is implemented as follows:
#include <ranges>
#include <clang/Lex/PPCallbacks.h>
#include <clang/Lex/MacroArgs.h>
#include <clang/Lex/Preprocessor.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/Tooling.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Rewrite/Core/Rewriter.h>
using namespace llvm;
using namespace clang::driver;
using namespace clang::tooling;
namespace {
llvm::cl::OptionCategory MyToolCategory("my-tool options");
}
class MacroExpander : public clang::PPCallbacks {
public:
explicit MacroExpander(clang::Preprocessor &rPP, clang::Rewriter &rRewriter)
: mPP(rPP)
, mRewriter(rRewriter)
{}
void MacroExpands(
const clang::Token& MacroNameTok,
const clang::MacroDefinition& MD, clang::SourceRange Range,
const clang::MacroArgs* Args) override {
// Get the macro name
const auto MacroName = mPP.getSpelling(MacroNameTok);
// Get the expanded macro body
std::string MacroBody;
const auto MI = MD.getMacroInfo();
if (!MI->isFunctionLike()) {
// I think there is always just one token in these single
// objectLike macros that do not contain parameters.
for (const auto next : MI->tokens()) {
MacroBody += mPP.getSpelling(next);
}
// Print the macro name and body
const auto debugString = std::format(
"Macro {} expands to: {}"
, MacroName
, MacroBody);
llvm::errs() << debugString << "n";
// TODO make sure macro expansion nesting works.
mRewriter.ReplaceText(Range, MacroBody);
} else {
// This is the complicated function like macro invocation.
// Create a map of macro parameter name/value pairs, so we
// can substitute them in the body as we expand the tokens.
std::vector<std::string> macroArgNames;
for (const auto next : MI->params()) {
macroArgNames.emplace_back(next->getName().str());
}
std::vector<std::string> macroArgValues;
for (unsigned i = 0u, e = Args->getNumMacroArguments(); i != e; ++i) {
if (const auto next = Args->getUnexpArgument(i); next) {
macroArgValues.emplace_back(mPP.getSpelling(*next));
}
}
// Make sure we have the same number of arguments as parameters
if (macroArgValues.size() == macroArgNames.size()) {
std::map<std::string, std::string> macroParamInfo;
for (auto i = 0; i<macroArgNames.size(); ++i) {
macroParamInfo[macroArgNames[i]] = macroArgValues[i];
}
for (const auto next : MI->tokens()) {
if (next.is(clang::tok::identifier)) {
// Identifiers that are not found in our macro
// parameters need to be forwarded directly
// to the MacroBody unmodified.
const auto identifier = mPP.getSpelling(next);
if (const auto iter = macroParamInfo.find(
identifier); iter != macroParamInfo.cend()) {
MacroBody += iter->second;
} else {
MacroBody += mPP.getSpelling(next);
}
} else {
MacroBody += mPP.getSpelling(next);
}
}
// Print the macro name and body
// For example if macro is "#define MAX(a, b) ...",
// commaSeparatedParams are "a, b".
const std::vector paramVec(MI->params().begin(), MI->params().end());
const auto& commaSeparatedParams = paramVec
| std::views::transform([&](const clang::IdentifierInfo* next) {
return next->getName().str();
})
| std::views::join_with(std::string(", "))
| std::ranges::to<std::string>();
// For example if macro usage is "MAX(1, 2) ...",
// commaSeparatedArgs are "1, 2".
std::vector<clang::Token> argTokens;
for (unsigned i = 0u, e = Args->getNumMacroArguments(); i != e; ++i) {
if (const auto next = Args->getUnexpArgument(i); next) {
argTokens.emplace_back(*next);
}
}
const auto& commaSeparatedArgs = argTokens
| std::views::transform([&](const auto& next) {
return mPP.getSpelling(next);
})
| std::views::join_with(std::string(", "))
| std::ranges::to<std::string>();
const auto debugString = std::format(
"Macro {}({}) expands to: {}"
, MacroName
, commaSeparatedArgs
, MacroBody);
llvm::errs() << debugString << "n";
// TODO make sure macro expansion nesting works.
mRewriter.ReplaceText(Range, MacroBody );
}
}
}
private:
clang::Preprocessor& mPP;
clang::Rewriter& mRewriter;
};
class MyFrontendAction : public clang::ASTFrontendAction {
public:
[[nodiscard]] std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(
clang::CompilerInstance &CI, llvm::StringRef file) override {
// Initialize the Rewriter, SourceManager and Language Options
// from the provided compiler instance.
mRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
CI.getPreprocessor().addPPCallbacks(std::make_unique<
MacroExpander>(CI.getPreprocessor(), mRewriter));
return std::make_unique<clang::ASTConsumer>();
}
//! Outputs rewritten source to console.
void EndSourceFileAction() override {
mRewriter.getEditBuffer(mRewriter.getSourceMgr().
getMainFileID()).write(llvm::outs());
}
private:
clang::Rewriter mRewriter;
};
int main(int argc, const char **argv) {
auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory);
if (!ExpectedParser) {
llvm::errs() << ExpectedParser.takeError();
return 1;
}
CommonOptionsParser &OptionsParser = ExpectedParser.get();
ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList());
return Tool.run(newFrontendActionFactory<MyFrontendAction>().get());
}
Test ‘C’ code containing unexpanded macros
#include <math.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdbool.h>
#define TRUE 1
#define FALSE 0
#define SEMI ;
#define GT >
#define GE >=
#define LT <
#define LE <=
#define NE !
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MULTI_LINE_MACRO(a, b) (((a) > (b)) ?
/*LHS*/pow((a), 2) :
/*RHS*/pow((b), 3) )
#define KNOT_TO_MSEC(a) ((a)*0.514444)
#define STANDARD_GRAVITY (9.80665)
#define LON_LIMIT 180.0000241664
#define LAT_LIMIT (LON_LIMIT/2.0)
#define LOG(format, ...) printf(format, __VA_ARGS__)
void
variadicFunctionTest(int count, ...) {
va_list args;
va_start(args, count);
for (int i = 0; i < count; i++) {
int num = va_arg(args, int);
LOG("Argument %d: %dn", i+1, num);
}
va_end(args);
}
void foo() {
int a, b, c, d, e, f, g;
int t = 0;
variadicFunctionTest(3, 1, 2, 3);
// double bar = MULTI_LINE_MACRO(1, 3);
double bar = MULTI_LINE_MACRO(1, 3);
// double bar = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
double foo = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
// a = b > c ? d : e;
a = b > c ? d : e;
// a = (b > c) ? (a LT b) ? c : d : TRUE;
a = (b > c) ? (a LT b) ? c : d : TRUE;
// a = b GT c ? d : e;
a = b GT c ? d : e;
// a = (b GT c) ? (a LT b) : FALSE ? d : e;
a = (b GT c) ? (a LT b) : FALSE ? d : e;
// a = MIN(1, 7);
a = MIN(1, 7);
// a = MAX( MIN(e, f), g);
a = MAX( MIN(e, f), g);
}
Test output (truncated at top..)
We do not need to see all the #included macro expansions)
…
Macro LOG("Argument %d: %dn", i) expands to: printf("Argument %d: %dn",i)
Macro va_end expands to: __crt_va_end
Macro __crt_va_end(args) expands to: ((void)(args=(va_list)0))
Macro MULTI_LINE_MACRO(1, 3) expands to: (((1)>(3))?pow((1),2):pow((3),3))
Macro KNOT_TO_MSEC(123.0) expands to: ((123.0)*0.514444)
Macro STANDARD_GRAVITY expands to: (9.80665)
Macro LT expands to: <
Macro TRUE expands to: 1
Macro GT expands to: >
Macro GT expands to: >
Macro LT expands to: <
Macro FALSE expands to: 0
Macro MIN(1, 7) expands to: (((1)<=(7))?(1):(7))
Macro MAX(MIN, g) expands to: (((MIN)>(g))?(MIN):(g))
Macro MIN(e, f) expands to: (((e)<=(f))?(e):(f))
#include <math.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdbool.h>
#define TRUE 1
#define FALSE 0
#define SEMI ;
#define GT >
#define GE >=
#define LT <
#define LE <=
#define NE !
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MULTI_LINE_MACRO(a, b) (((a) > (b)) ?
/*LHS*/pow((a), 2) :
/*RHS*/pow((b), 3) )
#define KNOT_TO_MSEC(a) ((a)*0.514444)
#define STANDARD_GRAVITY (9.80665)
#define LON_LIMIT 180.0000241664
#define LAT_LIMIT (LON_LIMIT/2.0)
#define LOG(format, ...) printf(format, __VA_ARGS__)
void
variadicFunctionTest(int count, ...) {
va_list args;
__crt_va_start(args, count);
for (int i = 0; i < count; i++) {
int num = __crt_va_arg(args, int);
printf("Argument %d: %dn",i);
}
__crt_va_end(args);
}
void foo() {
int a, b, c, d, e, f, g;
int t = 0;
variadicFunctionTest(3, 1, 2, 3);
// double bar = MULTI_LINE_MACRO(1, 3);
double bar = (((1)>(3))?pow((1),2):pow((3),3));
// double bar = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
double foo = ((123.0)*0.514444) / (9.80665);
// a = b > c ? d : e;
a = b > c ? d : e;
// a = (b > c) ? (a LT b) ? c : d : TRUE;
a = (b > c) ? (a < b) ? c : d : 1;
// a = b GT c ? d : e;
a = b > c ? d : e;
// a = (b GT c) ? (a LT b) : FALSE ? d : e;
a = (b > c) ? (a < b) : 0 ? d : e;
// a = MIN(1, 7);
a = (((1)<=(7))?(1):(7));
// a = MAX( MIN(e, f), g);
a = (((MIN)>(g(((e)<=(f))?(e):(f))(g));
}