17 #include "llvm/Support/MD5.h" 18 #include "llvm/Support/Path.h" 20 using namespace clang;
23 unsigned StartIndex,
unsigned EndIndex)
24 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
25 assert(Stmt &&
"Stmt must not be a nullptr");
26 assert(StartIndex < EndIndex &&
"Given array should not be empty");
27 assert(EndIndex <= Stmt->
size() &&
"Given array too big for this Stmt");
31 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
34 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
46 bool StartIsInBounds =
62 auto CS = cast<CompoundStmt>(S);
63 return CS->body_begin() + StartIndex;
70 auto CS = cast<CompoundStmt>(S);
71 return CS->body_begin() + EndIndex;
115 if (Group.size() < OtherGroup.size())
126 std::vector<CloneDetector::CloneGroup> &
Result) {
127 std::vector<unsigned> IndexesToRemove;
133 for (
unsigned i = 0;
i < Result.size(); ++
i) {
134 for (
unsigned j = 0; j < Result.size(); ++j) {
140 IndexesToRemove.push_back(i);
149 for (
auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
150 Result.erase(Result.begin() + *I);
157 if (IgnoredFilesPattern.empty() || Group.empty() ||
158 !IgnoredFilesRegex->isValid(Error))
163 StringRef
Filename = llvm::sys::path::filename(
164 SM.
getFilename(S.getContainingDecl()->getLocation()));
165 if (IgnoredFilesRegex->match(Filename))
181 class CloneTypeIIStmtDataCollector
187 template <
class Ty>
void addData(
const Ty &Data) {
194 : Context(Context), DataConsumer(DataConsumer) {
201 #define DEF_ADD_DATA(CLASS, CODE) \ 202 template <class = void> void Visit##CLASS(const CLASS *S) { \ 204 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ 207 #include "clang/AST/StmtDataCollectors.inc" 210 #define SKIP(CLASS) \ 211 void Visit##CLASS(const CLASS *S) { \ 212 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ 229 llvm::MD5::MD5Result HashResult;
230 Hash.final(HashResult);
235 std::min(
sizeof(HashCode),
sizeof(HashResult)));
251 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
255 CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
260 for (
const Stmt *Child : S->children()) {
261 if (Child ==
nullptr) {
262 ChildHashes.push_back(0);
265 size_t ChildHash =
saveHash(Child, D, StmtsByHash);
267 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
268 ChildHashes.push_back(ChildHash);
275 for (
unsigned Pos = 0; Pos < CS->size(); ++Pos) {
280 for (
unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
283 size_t ChildHash = ChildHashes[Pos + Length - 1];
285 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
289 llvm::MD5 SubHash = Hash;
290 StmtsByHash.push_back(std::make_pair(
298 StmtsByHash.push_back(std::make_pair(HashCode,
StmtSequence(S, D)));
305 class FoldingSetNodeIDWrapper {
307 llvm::FoldingSetNodeID &FS;
310 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
312 void update(StringRef Str) { FS.AddString(Str); }
319 FoldingSetNodeIDWrapper &OutputData) {
320 for (
const Stmt *S : Sequence) {
321 CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
324 for (
const Stmt *Child : S->children()) {
341 llvm::FoldingSetNodeID DataLHS, DataRHS;
342 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
343 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
348 return DataLHS == DataRHS;
352 std::vector<CloneDetector::CloneGroup> &Sequences) {
354 std::vector<CloneDetector::CloneGroup>
Result;
362 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
366 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
370 llvm::stable_sort(StmtsByHash, llvm::less_first());
376 for (
unsigned i = 0;
i < StmtsByHash.size() - 1; ++
i) {
377 const auto Current = StmtsByHash[
i];
384 size_t PrototypeHash = Current.first;
386 for (;
i < StmtsByHash.size(); ++
i) {
388 if (PrototypeHash != StmtsByHash[
i].first) {
399 NewGroup.push_back(StmtsByHash[
i].second);
404 Result.push_back(NewGroup);
412 std::vector<CloneDetector::CloneGroup> &Sequences) {
421 const std::string &ParentMacroStack) {
425 size_t Complexity = 1;
430 std::string MacroStack =
441 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
448 for (
const Stmt *S : Seq) {
449 Complexity += calculateStmtComplexity(
450 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
451 if (Complexity >= Limit)
456 Complexity += calculateStmtComplexity(
458 if (Complexity >= Limit)
466 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
476 std::vector<CloneDetector::CloneGroup> &CloneGroups,
479 std::vector<CloneDetector::CloneGroup>
Result;
480 for (
auto &HashGroup : CloneGroups) {
483 std::vector<char> Indexes;
484 Indexes.resize(HashGroup.size());
486 for (
unsigned i = 0;
i < HashGroup.size(); ++
i) {
500 for (
unsigned j = i + 1; j < HashGroup.size(); ++j) {
508 if (!Compare(Prototype, Candidate))
511 PotentialGroup.push_back(Candidate);
518 Result.push_back(PotentialGroup);
521 assert(llvm::all_of(Indexes, [](
char c) {
return c == 1; }));
526 void VariablePattern::addVariableOccurence(
const VarDecl *
VarDecl,
527 const Stmt *Mention) {
529 for (
size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
530 if (Variables[KindIndex] == VarDecl) {
533 Occurences.emplace_back(KindIndex, Mention);
539 Occurences.emplace_back(Variables.size(), Mention);
540 Variables.push_back(VarDecl);
543 void VariablePattern::addVariables(
const Stmt *S) {
551 if (
auto D = dyn_cast<DeclRefExpr>(S)) {
552 if (
auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
553 addVariableOccurence(VD, D);
565 unsigned NumberOfDifferences = 0;
567 assert(Other.Occurences.size() == Occurences.size());
568 for (
unsigned i = 0;
i < Occurences.size(); ++
i) {
569 auto ThisOccurence = Occurences[
i];
570 auto OtherOccurence = Other.Occurences[
i];
571 if (ThisOccurence.KindID == OtherOccurence.KindID)
574 ++NumberOfDifferences;
578 if (FirstMismatch ==
nullptr)
583 if (NumberOfDifferences != 1)
586 const VarDecl *FirstSuggestion =
nullptr;
590 if (OtherOccurence.KindID < Variables.size())
591 FirstSuggestion = Variables[OtherOccurence.KindID];
596 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
602 const VarDecl *SecondSuggestion =
nullptr;
603 if (ThisOccurence.KindID < Other.Variables.size())
604 SecondSuggestion = Other.Variables[ThisOccurence.KindID];
609 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
624 return NumberOfDifferences;
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Stmt - This represents one statement.
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
Analyzes the pattern of the referenced variables in a statement.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
Decl - This represents one declaration (or definition), e.g.
Defines the C++ template declaration subclasses.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
const Stmt * front() const
Returns the first statement in this sequence.
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
Represents a variable declaration or definition.
StmtSequence()
Constructs an empty StmtSequence.
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
This file defines classes for searching and analyzing source code clones.
static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence >> &StmtsByHash)
Generates and saves a hash code for the given Stmt.
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
Identifies a list of statements.
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation...
SourceLocation getBeginLoc() const LLVM_READONLY
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
bool empty() const
Returns true if and only if this StmtSequence contains no statements.
size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")
Calculates the complexity of the given StmtSequence.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
CompoundStmt - This represents a group of statements like { stmt stmt }.
This file declares helper methods for collecting data from AST nodes.
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
The result type of a method or function.
SourceLocation getEndLoc() const LLVM_READONLY
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group...
Utility class holding the relevant information about a single clone in this pair. ...
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
StringRef getFilename(SourceLocation SpellingLoc) const
Return the filename of the file containing a SourceLocation.
unsigned size() const
Returns the number of statements this object holds.
ASTContext & getASTContext() const
Encodes a location in the source.
ASTContext & getASTContext() const LLVM_READONLY
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
const Stmt *const * iterator
bool holdsSequence() const
Returns true if this objects holds a list of statements.
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)
Utility functions for implementing addData() for a consumer that has a method update(StringRef) ...
Dataflow Directional Tag Classes.
const Stmt * back() const
Returns the last statement in this sequence.
SourceLocation getBeginLoc() const
Returns the start sourcelocation of the first statement in this sequence.
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
SourceManager & getSourceManager()
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
static size_t createHash(llvm::MD5 &Hash)
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
__DEVICE__ int min(int __a, int __b)
StringLiteral - This represents a string literal expression, e.g.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.
A reference to a declared variable, function, enum, etc.
A trivial tuple used to represent a source range.
A boolean literal, per ([C++ lex.bool] Boolean literals).
This class handles loading and caching of source files into memory.
void constrain(std::vector< CloneDetector::CloneGroup > &Result)