clang-tools  9.0.0
BranchCloneCheck.cpp
Go to the documentation of this file.
1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "BranchCloneCheck.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Analysis/CloneDetection.h"
14 #include "llvm/Support/Casting.h"
15 
16 using namespace clang;
17 using namespace clang::ast_matchers;
18 
19 /// Returns true when the statements are Type I clones of each other.
20 static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
21  const ASTContext &Context) {
22  llvm::FoldingSetNodeID DataLHS, DataRHS;
23  LHS->Profile(DataLHS, Context, false);
24  RHS->Profile(DataRHS, Context, false);
25  return (DataLHS == DataRHS);
26 }
27 
28 namespace {
29 /// A branch in a switch may consist of several statements; while a branch in
30 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
31 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
32 } // anonymous namespace
33 
34 /// Determines if the bodies of two branches in a switch statements are Type I
35 /// clones of each other. This function only examines the body of the branch
36 /// and ignores the `case X:` or `default:` at the start of the branch.
37 static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
38  const SwitchBranch RHS,
39  const ASTContext &Context) {
40  if (LHS.size() != RHS.size())
41  return false;
42 
43  for (size_t i = 0, Size = LHS.size(); i < Size; i++) {
44  // NOTE: We strip goto labels and annotations in addition to stripping
45  // the `case X:` or `default:` labels, but it is very unlikely that this
46  // would casue false positives in real-world code.
47  if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(),
48  RHS[i]->stripLabelLikeStatements(), Context)) {
49  return false;
50  }
51  }
52 
53  return true;
54 }
55 
56 namespace clang {
57 namespace tidy {
58 namespace bugprone {
59 
60 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
61  Finder->addMatcher(
62  ifStmt(stmt().bind("if"),
63  hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
64  hasElse(stmt().bind("else"))),
65  this);
66  Finder->addMatcher(switchStmt().bind("switch"), this);
67  Finder->addMatcher(conditionalOperator().bind("condOp"), this);
68 }
69 
70 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
71  const ASTContext &Context = *Result.Context;
72 
73  if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
74  const Stmt *Then = IS->getThen();
75  assert(Then && "An IfStmt must have a `then` branch!");
76 
77  const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
78  assert(Else && "We only look for `if` statements with an `else` branch!");
79 
80  if (!isa<IfStmt>(Else)) {
81  // Just a simple if with no `else if` branch.
82  if (areStatementsIdentical(Then->IgnoreContainers(),
83  Else->IgnoreContainers(), Context)) {
84  diag(IS->getBeginLoc(), "if with identical then and else branches");
85  diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
86  }
87  return;
88  }
89 
90  // This is the complicated case when we start an if/else if/else chain.
91  // To find all the duplicates, we collect all the branches into a vector.
92  llvm::SmallVector<const Stmt *, 4> Branches;
93  const IfStmt *Cur = IS;
94  while (true) {
95  // Store the `then` branch.
96  Branches.push_back(Cur->getThen());
97 
98  Else = Cur->getElse();
99  // The chain ends if there is no `else` branch.
100  if (!Else)
101  break;
102 
103  // Check if there is another `else if`...
104  Cur = dyn_cast<IfStmt>(Else);
105  if (!Cur) {
106  // ...this is just a plain `else` branch at the end of the chain.
107  Branches.push_back(Else);
108  break;
109  }
110  }
111 
112  size_t N = Branches.size();
113  llvm::BitVector KnownAsClone(N);
114 
115  for (size_t i = 0; i + 1 < N; i++) {
116  // We have already seen Branches[i] as a clone of an earlier branch.
117  if (KnownAsClone[i])
118  continue;
119 
120  int NumCopies = 1;
121 
122  for (size_t j = i + 1; j < N; j++) {
123  if (KnownAsClone[j] ||
124  !areStatementsIdentical(Branches[i]->IgnoreContainers(),
125  Branches[j]->IgnoreContainers(), Context))
126  continue;
127 
128  NumCopies++;
129  KnownAsClone[j] = true;
130 
131  if (NumCopies == 2) {
132  // We report the first occurence only when we find the second one.
133  diag(Branches[i]->getBeginLoc(),
134  "repeated branch in conditional chain");
135  SourceLocation End =
136  Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
137  *Result.SourceManager, getLangOpts());
138  if (End.isValid()) {
139  diag(End, "end of the original", DiagnosticIDs::Note);
140  }
141  }
142 
143  diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
144  DiagnosticIDs::Note)
145  << (NumCopies - 1);
146  }
147  }
148  return;
149  }
150 
151  if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
152  // We do not try to detect chains of ?: operators.
153  if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
154  diag(CO->getQuestionLoc(),
155  "conditional operator with identical true and false expressions");
156 
157  return;
158  }
159 
160  if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
161  const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
162 
163  // Code like
164  // switch (x) case 0: case 1: foobar();
165  // is legal and calls foobar() if and only if x is either 0 or 1;
166  // but we do not try to distinguish branches in such code.
167  if (!Body)
168  return;
169 
170  // We will first collect the branches of the switch statements. For the
171  // sake of simplicity we say that branches are delimited by the SwitchCase
172  // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
173  // `default:` labels embedded inside other statements and we do not follow
174  // the effects of `break` and other manipulation of the control-flow.
175  llvm::SmallVector<SwitchBranch, 4> Branches;
176  for (const Stmt *S : Body->body()) {
177  // If this is a `case` or `default`, we start a new, empty branch.
178  if (isa<SwitchCase>(S))
179  Branches.emplace_back();
180 
181  // There may be code before the first branch (which can be dead code
182  // and can be code reached either through goto or through case labels
183  // that are embedded inside e.g. inner compound statements); we do not
184  // store those statements in branches.
185  if (!Branches.empty())
186  Branches.back().push_back(S);
187  }
188 
189  auto End = Branches.end();
190  auto BeginCurrent = Branches.begin();
191  while (BeginCurrent < End) {
192  auto EndCurrent = BeginCurrent + 1;
193  while (EndCurrent < End &&
194  areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
195  ++EndCurrent;
196  }
197  // At this point the iterator range {BeginCurrent, EndCurrent} contains a
198  // complete family of consecutive identical branches.
199  if (EndCurrent > BeginCurrent + 1) {
200  diag(BeginCurrent->front()->getBeginLoc(),
201  "switch has %0 consecutive identical branches")
202  << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
203 
204  SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
205  // If the case statement is generated from a macro, it's SourceLocation
206  // may be invalid, resuling in an assertation failure down the line.
207  // While not optimal, try the begin location in this case, it's still
208  // better then nothing.
209  if (EndLoc.isInvalid())
210  EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
211 
212  if (EndLoc.isMacroID())
213  EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
214  EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
215  getLangOpts());
216 
217  if (EndLoc.isValid()) {
218  diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
219  }
220  }
221  BeginCurrent = EndCurrent;
222  }
223  return;
224  }
225 
226  llvm_unreachable("No if statement and no switch statement.");
227 }
228 
229 } // namespace bugprone
230 } // namespace tidy
231 } // namespace clang
unsigned Branches
static bool areSwitchBranchesIdentical(const SwitchBranch LHS, const SwitchBranch RHS, const ASTContext &Context)
Determines if the bodies of two branches in a switch statements are Type I clones of each other...
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
Definition: Rename.cpp:36
static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, const ASTContext &Context)
Returns true when the statements are Type I clones of each other.