As you have discovered, the AST itself does not distinguish between the
two ways to spell the operator. However, the AST has source location
information that can be used to get the original text. The idea is to
use:
Note that when the operator results from a macro expansion, the source
retrieval method I am using does not work well, and I do not know if
that can be fixed.
Here is the core of a clang-tidy
check that does what you want (except
for cases involving macros):
// https://stackoverflow.com/questions/11083066/getting-the-source-behind-clangs-ast
std::string get_source_text_raw(SourceRange range, SourceManager const &sm)
{
return std::string(
clang::Lexer::getSourceText(
clang::CharSourceRange::getCharRange(range), sm, clang::LangOptions()
)
);
}
void FindOpAnd::registerMatchers(ast_matchers::MatchFinder *Finder)
{
// Match use of operator '&&' or 'and'.
Finder->addMatcher(
binaryOperator(hasOperatorName("&&"))
.bind("op"), this);
}
void FindOpAnd::check(const MatchFinder::MatchResult &Result)
{
ASTContext &context = *(Result.Context);
SourceManager &sm = context.getSourceManager();
// Get the node bound to "op" in the match expression.
const auto *opExpr = Result.Nodes.getNodeAs<BinaryOperator>("op");
// Location of the start of the operator.
SourceLocation opLoc = opExpr->getOperatorLoc();
// Try to find the location of the end.
SourceLocation opEndLoc =
clang::Lexer::getLocForEndOfToken(opLoc, 0, sm, clang::LangOptions());
if (opEndLoc.isValid()) {
// Get the text of the operator.
SourceRange opRange(opLoc, opEndLoc);
std::string opText = get_source_text_raw(opRange, sm);
if (opText == "&&") {
diag(opLoc, "Using '&&' instead of 'and'.")
<< FixItHint::CreateReplacement(opRange, "and");
}
else if (opText == "and") {
// This what we want.
}
else {
// This happens when the operator itself is the result of a macro
// expansion.
}
}
else {
// The end location will not be found if the operator was inside
// the expansion of a macro.
}
}
Sample input:
// op-and.cc
// Test input for clang-tidy FindOpAnd checker.
// Note: There are no digraphs or trigraphs for '&', so that is not a
// concern here.
#define MY_AMPER_OPERATOR &&
#define MY_AND_OPERATOR and
#define MY_AMPER_EXPR(a,b) ((a) && (b))
#define MY_AND_EXPR(a,b) ((a) and (b))
void f(bool a, bool b)
{
bool r;
r = a && b; // Reported.
r = a and b; // Not reported.
// Not reported: Operator name not recognized.
r = a MY_AMPER_OPERATOR b;
r = a MY_AND_OPERATOR b;
// Not reported: Operator end location is invalid so we cannot get the
// operator name.
r = MY_AMPER_EXPR(a,b);
r = MY_AND_EXPR(a,b);
}
// EOF
Sample output:
$ ./FindOpAnd.exe -checks=-*,FindOpAnd in/op-and.cc \
--export-fixes=out/op-and.cc.fixes.yaml --
1 warning generated.
[...]/in/op-and.cc:17:9: warning: Using '&&' instead of 'and'. [FindOpAnd]
r = a && b; // Reported.
^~
and
To actually apply the suggested fixes, pass the --fix
option.
For completeness, here is the full checker source and its Makefile
:
// FindOpAnd.cc
// Code for FindOpAnd.h.
#include "FindOpAnd.h" // this module
#include "clang/AST/ASTContext.h" // ASTContext
#include "clang/Lex/Lexer.h" // clang::Lexer
#include "clang-tidy/ClangTidyModule.h" // ClangTidyModule
#include "clang-tidy/ClangTidyModuleRegistry.h" // ClangTidyModuleRegistry
using namespace clang::ast_matchers;
namespace clang {
namespace tidy {
// https://stackoverflow.com/questions/11083066/getting-the-source-behind-clangs-ast
std::string get_source_text_raw(SourceRange range, SourceManager const &sm)
{
return std::string(
clang::Lexer::getSourceText(
clang::CharSourceRange::getCharRange(range), sm, clang::LangOptions()
)
);
}
void FindOpAnd::registerMatchers(ast_matchers::MatchFinder *Finder)
{
// Match use of operator '&&' or 'and'.
Finder->addMatcher(
binaryOperator(hasOperatorName("&&"))
.bind("op"), this);
}
void FindOpAnd::check(const MatchFinder::MatchResult &Result)
{
ASTContext &context = *(Result.Context);
SourceManager &sm = context.getSourceManager();
// Get the node bound to "op" in the match expression.
const auto *opExpr = Result.Nodes.getNodeAs<BinaryOperator>("op");
// Location of the start of the operator.
SourceLocation opLoc = opExpr->getOperatorLoc();
// Try to find the location of the end.
SourceLocation opEndLoc =
clang::Lexer::getLocForEndOfToken(opLoc, 0, sm, clang::LangOptions());
if (opEndLoc.isValid()) {
// Get the text of the operator.
SourceRange opRange(opLoc, opEndLoc);
std::string opText = get_source_text_raw(opRange, sm);
if (opText == "&&") {
diag(opLoc, "Using '&&' instead of 'and'.")
<< FixItHint::CreateReplacement(opRange, "and");
}
else if (opText == "and") {
// This what we want.
}
else {
// This happens when the operator itself is the result of a macro
// expansion.
}
}
else {
// The end location will not be found if the operator was inside
// the expansion of a macro.
}
}
class FindOpAndModule : public ClangTidyModule {
public:
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
CheckFactories.registerCheck<FindOpAnd>("FindOpAnd");
}
};
static ClangTidyModuleRegistry::Add<FindOpAndModule> X(
"FindOpAndModule",
"Adds FindOpAnd check.");
// This is defined in libclangTidyMain.a. It does not appear to be
// declared in any header file, so I doubt this is really how it is
// meant to be used.
int clangTidyMain(int argc, const char **argv);
} // namespace tidy
} // namespace clang
int main(int argc, const char **argv)
{
return clang::tidy::clangTidyMain(argc, argv);
}
// EOF
// FindOpAnd.h
// clang-tidy check to find operator '&&' as opposed to 'and'.
#ifndef FIND_OP_AND_H
#define FIND_OP_AND_H
#include "clang-tidy/ClangTidyCheck.h" // ClangTidyCheck
#include "clang/ASTMatchers/ASTMatchFinder.h" // ast_matchers::MatchFinder
namespace clang {
namespace tidy {
class FindOpAnd : public ClangTidyCheck {
public:
FindOpAnd(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context) {}
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
};
} // namespace tidy
} // namespace clang
#endif // FIND_OP_AND_H
# clang-tidy-op-and/Makefile
# Default target.
all:
.PHONY: all
# Eliminate all implicit rules.
.SUFFIXES:
# Delete a target when its recipe fails.
.DELETE_ON_ERROR:
# Do not remove "intermediate" targets.
.SECONDARY:
# ---- Paths ----
# Installation directory from a binary distribution.
# Has five subdirectories: bin include lib libexec share.
# Downloaded from: https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz
CLANG_LLVM_INSTALL_DIR = $(HOME)/opt/clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04
# Program to query the various LLVM configuration options.
LLVM_CONFIG = $(CLANG_LLVM_INSTALL_DIR)/bin/llvm-config
# ---- Compiler options ----
# C++ compiler.
CXX = g++
# Compiler options, including preprocessor options.
CXXFLAGS =
CXXFLAGS += -Wall
# Get llvm compilation flags.
CXXFLAGS += $(shell $(LLVM_CONFIG) --cxxflags)
# Linker options.
LDFLAGS =
# Needed libraries. The order is important. I do not know a principled
# way to obtain this list. I did it by chasing down each missing symbol
# in a link error.
LDFLAGS += -lclangTidy
LDFLAGS += -lclangTidyMain
LDFLAGS += -lclangTidyPlugin
LDFLAGS += -lclangToolingCore
LDFLAGS += -lclangFormat
LDFLAGS += -lclangToolingInclusions
LDFLAGS += -lclangTidyAbseilModule
LDFLAGS += -lclangTidyAlteraModule
LDFLAGS += -lclangTidyAndroidModule
LDFLAGS += -lclangTidyBoostModule
LDFLAGS += -lclangTidyBugproneModule
LDFLAGS += -lclangTidyCERTModule
LDFLAGS += -lclangTidyConcurrencyModule
LDFLAGS += -lclangTidyCppCoreGuidelinesModule
LDFLAGS += -lclangTidyDarwinModule
LDFLAGS += -lclangTidyFuchsiaModule
LDFLAGS += -lclangTidyGoogleModule
LDFLAGS += -lclangTidyHICPPModule
LDFLAGS += -lclangTidyLLVMLibcModule
LDFLAGS += -lclangTidyLLVMModule
LDFLAGS += -lclangTidyLinuxKernelModule
LDFLAGS += -lclangTidyMPIModule
LDFLAGS += -lclangTidyMiscModule
LDFLAGS += -lclangTidyModernizeModule
LDFLAGS += -lclangTidyObjCModule
LDFLAGS += -lclangTidyOpenMPModule
LDFLAGS += -lclangTidyPerformanceModule
LDFLAGS += -lclangTidyPortabilityModule
LDFLAGS += -lclangTidyReadabilityModule
LDFLAGS += -lclangTidyZirconModule
LDFLAGS += -lclangTidyUtils
LDFLAGS += -lclangTransformer
LDFLAGS += -lclangTooling
LDFLAGS += -lclangFrontendTool
LDFLAGS += -lclangFrontend
LDFLAGS += -lclangDriver
LDFLAGS += -lclangSerialization
LDFLAGS += -lclangCodeGen
LDFLAGS += -lclangParse
LDFLAGS += -lclangSema
LDFLAGS += -lclangStaticAnalyzerFrontend
LDFLAGS += -lclangStaticAnalyzerCheckers
LDFLAGS += -lclangStaticAnalyzerCore
LDFLAGS += -lclangAnalysis
LDFLAGS += -lclangARCMigrate
LDFLAGS += -lclangRewrite
LDFLAGS += -lclangRewriteFrontend
LDFLAGS += -lclangEdit
LDFLAGS += -lclangCrossTU
LDFLAGS += -lclangIndex
LDFLAGS += -lclangAST
LDFLAGS += -lclangASTMatchers
LDFLAGS += -lclangLex
LDFLAGS += -lclangBasic
LDFLAGS += -lclang
# *After* clang libs, the llvm libs.
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags --libs --system-libs)
# ---- Recipes ----
# Pull in automatic dependencies.
-include $(wildcard obj/*.d)
# Compile a C++ source file.
obj/%.o: %.cc
@mkdir -p $(dir $@)
$(CXX) -MMD -c -o $@ $(USE_PCH) $< $(CXXFLAGS)
# Sources for 'FindOpAnd.exe'.
SRCS :=
SRCS += FindOpAnd.cc
# Objects for 'FindOpAnd.exe'.
OBJS := $(patsubst %.cc,obj/%.o,$(SRCS))
# Executable.
all: FindOpAnd.exe
FindOpAnd.exe: $(OBJS)
$(CXX) -g -Wall -o $@ $(OBJS) $(LDFLAGS)
# Run program on one input.
out/%: in/% FindOpAnd.exe
@mkdir -p $(dir $@)
./FindOpAnd.exe -checks=-*,FindOpAnd in/$* \
--export-fixes=out/$*.fixes.yaml \
-- </dev/null 2>&1 | cat
touch $@
# Run tests.
.PHONY: check
check: FindOpAnd.exe
check: out/op-and.cc
# Remove test outputs.
.PHONY: check-clean
check-clean:
rm -rf out
# Remove compile and test outputs.
.PHONY: clean
clean: check-clean
$(RM) *.exe
rm -rf obj
# EOF