SuspiciousMissingCommaCheck.cpp
4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SuspiciousMissingCommaCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
using namespace clang::ast_matchers;
namespace clang {
namespace tidy {
namespace bugprone {
namespace {
bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
const StringLiteral *Lit) {
// String literals surrounded by parentheses are assumed to be on purpose.
// i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] };
TraversalKindScope RAII(*Ctx, ast_type_traits::TK_AsIs);
auto Parents = Ctx->getParents(*Lit);
if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
return true;
// Appropriately indented string literals are assumed to be on purpose.
// The following frequent indentation is accepted:
// const char* Array[] = {
// "first literal"
// "indented literal"
// "indented literal",
// "second literal",
// [...]
// };
const SourceManager &SM = Ctx->getSourceManager();
bool IndentedCorrectly = true;
SourceLocation FirstToken = Lit->getStrTokenLoc(0);
FileID BaseFID = SM.getFileID(FirstToken);
unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) {
SourceLocation Token = Lit->getStrTokenLoc(TokNum);
FileID FID = SM.getFileID(Token);
unsigned int Indent = SM.getSpellingColumnNumber(Token);
unsigned int Line = SM.getSpellingLineNumber(Token);
if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
IndentedCorrectly = false;
break;
}
}
if (IndentedCorrectly)
return true;
// There is no pattern recognized by the checker, assume it's not on purpose.
return false;
}
AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
MaxConcatenatedTokens) {
return Node.getNumConcatenated() > 1 &&
Node.getNumConcatenated() < MaxConcatenatedTokens &&
!isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
}
} // namespace
SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
SizeThreshold(Options.get("SizeThreshold", 5U)),
RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))),
MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
void SuspiciousMissingCommaCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "SizeThreshold", SizeThreshold);
Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
}
void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
const auto ConcatenatedStringLiteral =
stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
const auto StringsInitializerList =
initListExpr(hasType(constantArrayType()),
has(ignoringParenImpCasts(expr(ConcatenatedStringLiteral))));
Finder->addMatcher(StringsInitializerList.bind("list"), this);
}
void SuspiciousMissingCommaCheck::check(
const MatchFinder::MatchResult &Result) {
const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
const auto *ConcatenatedLiteral =
Result.Nodes.getNodeAs<StringLiteral>("str");
assert(InitializerList && ConcatenatedLiteral);
// Skip small arrays as they often generate false-positive.
unsigned int Size = InitializerList->getNumInits();
if (Size < SizeThreshold)
return;
// Count the number of occurrence of concatenated string literal.
unsigned int Count = 0;
for (unsigned int i = 0; i < Size; ++i) {
const Expr *Child = InitializerList->getInit(i)->IgnoreImpCasts();
if (const auto *Literal = dyn_cast<StringLiteral>(Child)) {
if (Literal->getNumConcatenated() > 1)
++Count;
}
}
// Warn only when concatenation is not common in this initializer list.
// The current threshold is set to less than 1/5 of the string literals.
if (double(Count) / Size > RatioThreshold)
return;
diag(ConcatenatedLiteral->getBeginLoc(),
"suspicious string literal, probably missing a comma");
}
} // namespace bugprone
} // namespace tidy
} // namespace clang