18 #include "llvm/Support/MD5.h" 19 #include "llvm/Support/Path.h" 21 using namespace clang;
24 unsigned StartIndex,
unsigned EndIndex)
25 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
26 assert(Stmt &&
"Stmt must not be a nullptr");
27 assert(StartIndex < EndIndex &&
"Given array should not be empty");
28 assert(EndIndex <= Stmt->
size() &&
"Given array too big for this Stmt");
32 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
35 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
47 bool StartIsInBounds =
63 auto CS = cast<CompoundStmt>(S);
64 return CS->body_begin() + StartIndex;
71 auto CS = cast<CompoundStmt>(S);
72 return CS->body_begin() + EndIndex;
116 if (Group.size() < OtherGroup.size())
127 std::vector<CloneDetector::CloneGroup> &
Result) {
128 std::vector<unsigned> IndexesToRemove;
134 for (
unsigned i = 0; i < Result.size(); ++i) {
135 for (
unsigned j = 0; j < Result.size(); ++j) {
141 IndexesToRemove.push_back(i);
150 for (
auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
151 Result.erase(Result.begin() + *I);
158 if (IgnoredFilesPattern.empty() || Group.empty() ||
159 !IgnoredFilesRegex->isValid(Error))
164 StringRef
Filename = llvm::sys::path::filename(
165 SM.
getFilename(S.getContainingDecl()->getLocation()));
166 if (IgnoredFilesRegex->match(Filename))
182 class CloneTypeIIStmtDataCollector
188 template <
class Ty>
void addData(
const Ty &Data) {
195 : Context(Context), DataConsumer(DataConsumer) {
202 #define DEF_ADD_DATA(CLASS, CODE) \ 203 template <class = void> void Visit##CLASS(const CLASS *S) { \ 205 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ 208 #include "clang/AST/StmtDataCollectors.inc" 211 #define SKIP(CLASS) \ 212 void Visit##CLASS(const CLASS *S) { \ 213 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ 230 llvm::MD5::MD5Result HashResult;
231 Hash.final(HashResult);
236 std::min(
sizeof(HashCode),
sizeof(HashResult)));
252 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
256 CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
261 for (
const Stmt *Child : S->children()) {
262 if (Child ==
nullptr) {
263 ChildHashes.push_back(0);
266 size_t ChildHash =
saveHash(Child, D, StmtsByHash);
268 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
269 ChildHashes.push_back(ChildHash);
276 for (
unsigned Pos = 0; Pos < CS->size(); ++Pos) {
281 for (
unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
284 size_t ChildHash = ChildHashes[Pos + Length - 1];
286 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
290 llvm::MD5 SubHash = Hash;
291 StmtsByHash.push_back(std::make_pair(
299 StmtsByHash.push_back(std::make_pair(HashCode,
StmtSequence(S, D)));
306 class FoldingSetNodeIDWrapper {
308 llvm::FoldingSetNodeID &FS;
311 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
313 void update(StringRef Str) { FS.AddString(Str); }
320 FoldingSetNodeIDWrapper &OutputData) {
321 for (
const Stmt *S : Sequence) {
322 CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
325 for (
const Stmt *Child : S->children()) {
342 llvm::FoldingSetNodeID DataLHS, DataRHS;
343 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
344 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
349 return DataLHS == DataRHS;
353 std::vector<CloneDetector::CloneGroup> &Sequences) {
355 std::vector<CloneDetector::CloneGroup>
Result;
363 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
367 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
371 std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
372 [](std::pair<size_t, StmtSequence> LHS,
373 std::pair<size_t, StmtSequence> RHS) {
374 return LHS.first < RHS.first;
381 for (
unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
382 const auto Current = StmtsByHash[i];
389 size_t PrototypeHash = Current.first;
391 for (; i < StmtsByHash.size(); ++i) {
393 if (PrototypeHash != StmtsByHash[i].first) {
404 NewGroup.push_back(StmtsByHash[i].second);
409 Result.push_back(NewGroup);
417 std::vector<CloneDetector::CloneGroup> &Sequences) {
426 const std::string &ParentMacroStack) {
430 size_t Complexity = 1;
435 std::string MacroStack =
446 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
453 for (
const Stmt *S : Seq) {
454 Complexity += calculateStmtComplexity(
455 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
456 if (Complexity >= Limit)
461 Complexity += calculateStmtComplexity(
463 if (Complexity >= Limit)
471 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
481 std::vector<CloneDetector::CloneGroup> &CloneGroups,
484 std::vector<CloneDetector::CloneGroup>
Result;
485 for (
auto &HashGroup : CloneGroups) {
488 std::vector<char> Indexes;
489 Indexes.resize(HashGroup.size());
491 for (
unsigned i = 0; i < HashGroup.size(); ++i) {
505 for (
unsigned j = i + 1; j < HashGroup.size(); ++j) {
513 if (!Compare(Prototype, Candidate))
516 PotentialGroup.push_back(Candidate);
523 Result.push_back(PotentialGroup);
526 assert(std::all_of(Indexes.begin(), Indexes.end(),
527 [](
char c) {
return c == 1; }));
532 void VariablePattern::addVariableOccurence(
const VarDecl *
VarDecl,
533 const Stmt *Mention) {
535 for (
size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
536 if (Variables[KindIndex] == VarDecl) {
539 Occurences.emplace_back(KindIndex, Mention);
545 Occurences.emplace_back(Variables.size(), Mention);
546 Variables.push_back(VarDecl);
549 void VariablePattern::addVariables(
const Stmt *S) {
557 if (
auto D = dyn_cast<DeclRefExpr>(S)) {
558 if (
auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
559 addVariableOccurence(VD, D);
571 unsigned NumberOfDifferences = 0;
573 assert(Other.Occurences.size() == Occurences.size());
574 for (
unsigned i = 0; i < Occurences.size(); ++i) {
575 auto ThisOccurence = Occurences[i];
576 auto OtherOccurence = Other.Occurences[i];
577 if (ThisOccurence.KindID == OtherOccurence.KindID)
580 ++NumberOfDifferences;
584 if (FirstMismatch ==
nullptr)
589 if (NumberOfDifferences != 1)
592 const VarDecl *FirstSuggestion =
nullptr;
596 if (OtherOccurence.KindID < Variables.size())
597 FirstSuggestion = Variables[OtherOccurence.KindID];
602 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
608 const VarDecl *SecondSuggestion =
nullptr;
609 if (ThisOccurence.KindID < Other.Variables.size())
610 SecondSuggestion = Other.Variables[ThisOccurence.KindID];
615 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
630 return NumberOfDifferences;
SourceLocation getStartLoc() const LLVM_READONLY
Returns the start sourcelocation of the first statement in this sequence.
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Stmt - This represents one statement.
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
Analyzes the pattern of the referenced variables in a statement.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
Decl - This represents one declaration (or definition), e.g.
Defines the C++ template declaration subclasses.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
const Stmt * front() const
Returns the first statement in this sequence.
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
Represents a variable declaration or definition.
StmtSequence()
Constructs an empty StmtSequence.
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
This file defines classes for searching and analyzing source code clones.
static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence >> &StmtsByHash)
Generates and saves a hash code for the given Stmt.
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
Identifies a list of statements.
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation...
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
bool empty() const
Returns true if and only if this StmtSequence contains no statements.
size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")
Calculates the complexity of the given StmtSequence.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
CompoundStmt - This represents a group of statements like { stmt stmt }.
This file declares helper methods for collecting data from AST nodes.
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
The result type of a method or function.
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group...
Utility class holding the relevant information about a single clone in this pair. ...
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
StringRef getFilename(SourceLocation SpellingLoc) const
Return the filename of the file containing a SourceLocation.
unsigned size() const
Returns the number of statements this object holds.
ASTContext & getASTContext() const
Encodes a location in the source.
ASTContext & getASTContext() const LLVM_READONLY
SourceLocation getLocStart() const LLVM_READONLY
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
const Stmt *const * iterator
bool holdsSequence() const
Returns true if this objects holds a list of statements.
SourceLocation getLocEnd() const LLVM_READONLY
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)
Utility functions for implementing addData() for a consumer that has a method update(StringRef) ...
Dataflow Directional Tag Classes.
const Stmt * back() const
Returns the last statement in this sequence.
SourceLocation getBeginLoc() const
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
SourceManager & getSourceManager()
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
static size_t createHash(llvm::MD5 &Hash)
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
StringLiteral - This represents a string literal expression, e.g.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.
A reference to a declared variable, function, enum, etc.
__DEVICE__ int min(int __a, int __b)
A trivial tuple used to represent a source range.
A boolean literal, per ([C++ lex.bool] Boolean literals).
This class handles loading and caching of source files into memory.
void constrain(std::vector< CloneDetector::CloneGroup > &Result)