The following ‘C’ source code containing a nested ternary statement fails to be rewritten correctly using libTooling’s clang::Rewriter with a RecursiveASTVisitor.
I cannot figure out why. I added diagnostic code to examine the edit buffer after rewriting at each stage, unfortunately rewriting the nested ternary causes the entire rewrite buffer to become corrupted.
Starting with the following C source file which contains a recursive ConditionalOperator:
void nestedTernaryDeclStmt() {
int ii, jj, kk, ll, mm;
int foo = ii > jj ? ( kk <= ll ) ? mm : 4123 : 5321 ;
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ (outer ternary)
^~~~~~~~~~~~~~~~~~~~~~~^ (nested ternary)
}
At the end of the first bool VisitConditionalOperator(clang::ConditionalOperator *CO) const
callback, the contents of the file’s edit buffer is correctly rewritten as:
void nestedTernaryDeclStmt() {
int ii, jj, kk, ll, mm;
int foo = 'REPLACEMENT 0' ;
}
This is what I expected as the outermost ternary is completely rewritten. Then when the bool VisitConditionalOperator(clang::ConditionalOperator *CO) const
is called for the second time containing the nested ternary expression ( kk <= ll ) ? mm : 4123
, dumping the rewritten rewrite buffer has the following corrupted content.
void nestedTernaryDeclStmt() {
int ii, jj, kk, ll, mm;'REPLACEMENT 1'MENT 0' ;
}
Instead of using the getSourceRange from the CO parameter, I adjusted this to span the range up to the end of the last token (a frequently asked question about clang range locations). I also tried many other combinations of type of tokened and non tokened character ranges but nothing I try seems to work.
Here is the problematic visitor:
// Experimental recursive visitor class.
class MyVisitor : public clang::RecursiveASTVisitor<MyVisitor> {
public:
explicit MyVisitor(
clang::ASTContext& rContext,
clang::Rewriter& rRewriter)
: mContext{rContext}
, mRewriter{rRewriter}
{}
#if 0
// default behavior is to traverse the AST in pre-order (override to true to force post-order).
// @JC note that since this uses CRTP pattern (i.e. class Derived : public Base<Derived>),
// the method is not virtual & bypasses the need for a VTable - very clever!
bool shouldTraversePostOrder() const {
return false;
}
#endif
//! Visitor pattern callback for 'ConditionalOperator'.
bool VisitConditionalOperator(clang::ConditionalOperator *CO) const {
// This method is called for every 'ConditionalOperator' in the code.
// You can examine 'CO' to extract information about it.
const auto& SM = mContext.getSourceManager();
const auto& LO = mContext.getLangOpts();
const auto sourceRange = CO->getSourceRange();
// Assume SM is a clang::SourceManager object and Range is a clang::SourceRange for the range
const auto BLoc = sourceRange.getBegin();
const auto ELoc = sourceRange.getEnd();
// Adjust the end location to the end of the last token
const auto AdjustedELoc = clang::Lexer::getLocForEndOfToken(
ELoc, 0, SM, LO);
// Create adjusted range that includes the length of the last token
clang::SourceRange AdjustedRange(BLoc, AdjustedELoc);
auto CSR1 = clang::CharSourceRange::getCharRange(BLoc, AdjustedELoc);
//CSR1.setTokenRange(true);
unsigned BLine = SM.getSpellingLineNumber(BLoc);
unsigned BCol = SM.getSpellingColumnNumber(BLoc);
unsigned ELine = SM.getSpellingLineNumber(ELoc);
unsigned ECol = SM.getSpellingColumnNumber(ELoc);
auto cond = gStmtToString(&mContext, CO->getCond());
auto lhs = gStmtToString(&mContext, CO->getLHS());
auto rhs = gStmtToString(&mContext, CO->getRHS());
// Rewrite as follows:
const auto probeText = std::format(
"'REPLACEMENT {}'"
, gProbeIndex++);
mRewriter.ReplaceText(/*sourceRange*/CSR1, probeText);
// Get the RewriteBuffer for the main file.
std::string str;
llvm::raw_string_ostream rso(str);
clang::RewriteBuffer &RB = mRewriter.getEditBuffer(SM.getMainFileID());
RB.write(rso);
rso.flush();
// returning false aborts the traversal
return true;
}
private:
clang::ASTContext& mContext;
clang::Rewriter& mRewriter;
};