20 #include "llvm/Support/MD5.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Support/Path.h"
24 using namespace clang;
25 using namespace clang::clone_detection;
28 unsigned StartIndex,
unsigned EndIndex)
29 :
S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
30 assert(Stmt &&
"Stmt must not be a nullptr");
31 assert(StartIndex < EndIndex &&
"Given array should not be empty");
32 assert(EndIndex <= Stmt->
size() &&
"Given array too big for this Stmt");
36 :
S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
39 :
S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
51 bool StartIsInBounds =
67 auto CS = cast<CompoundStmt>(
S);
68 return CS->body_begin() + StartIndex;
75 auto CS = cast<CompoundStmt>(
S);
76 return CS->body_begin() + EndIndex;
108 std::string MacroStack;
109 llvm::raw_string_ostream MacroStackStream(MacroStack);
118 MacroStackStream.flush();
148 if (Group.size() < OtherGroup.size())
159 std::vector<CloneDetector::CloneGroup> &
Result) {
160 std::vector<unsigned> IndexesToRemove;
166 for (
unsigned i = 0; i < Result.size(); ++i) {
167 for (
unsigned j = 0; j < Result.size(); ++j) {
173 IndexesToRemove.push_back(i);
182 for (
auto I = IndexesToRemove.rbegin();
I != IndexesToRemove.rend(); ++
I) {
183 Result.erase(Result.begin() + *
I);
196 S.getContainingDecl()->getLocation()));
208 llvm::MD5::MD5Result HashResult;
209 Hash.final(HashResult);
213 std::memcpy(&HashCode, &HashResult,
214 std::min(
sizeof(HashCode),
sizeof(HashResult)));
219 size_t RecursiveCloneTypeIIConstraint::saveHash(
221 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
230 for (
const Stmt *Child : S->children()) {
231 if (Child ==
nullptr) {
232 ChildHashes.push_back(0);
235 size_t ChildHash = saveHash(Child, D, StmtsByHash);
237 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
238 ChildHashes.push_back(ChildHash);
245 for (
unsigned Pos = 0; Pos < CS->size(); ++Pos) {
253 size_t ChildHash = ChildHashes[Pos +
Length - 1];
255 StringRef(reinterpret_cast<char *>(&ChildHash),
sizeof(ChildHash)));
259 llvm::MD5 SubHash = Hash;
260 StmtsByHash.push_back(std::make_pair(
268 StmtsByHash.push_back(std::make_pair(HashCode,
StmtSequence(S, D)));
275 class FoldingSetNodeIDWrapper {
277 llvm::FoldingSetNodeID &FS;
280 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
282 void update(StringRef Str) { FS.AddString(Str); }
289 FoldingSetNodeIDWrapper &OutputData) {
290 for (
const Stmt *S : Sequence) {
294 for (
const Stmt *Child : S->children()) {
311 llvm::FoldingSetNodeID DataLHS, DataRHS;
312 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
313 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
318 return DataLHS == DataRHS;
322 std::vector<CloneDetector::CloneGroup> &Sequences) {
324 std::vector<CloneDetector::CloneGroup>
Result;
332 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
336 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
340 std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
341 [](std::pair<size_t, StmtSequence> LHS,
342 std::pair<size_t, StmtSequence> RHS) {
343 return LHS.first < RHS.first;
350 for (
unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
351 const auto Current = StmtsByHash[i];
358 size_t PrototypeHash =
Current.first;
360 for (; i < StmtsByHash.size(); ++i) {
362 if (PrototypeHash != StmtsByHash[i].first ||
374 NewGroup.push_back(StmtsByHash[i].second);
379 Result.push_back(NewGroup);
387 const StmtSequence &Seq,
const std::string &ParentMacroStack) {
391 size_t Complexity = 1;
407 if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
408 EndMacroStack == ParentMacroStack)) {
415 for (
const Stmt *S : Seq) {
417 StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
429 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
439 std::vector<CloneDetector::CloneGroup> &CloneGroups,
441 std::vector<CloneDetector::CloneGroup>
Result;
442 for (
auto &HashGroup : CloneGroups) {
445 std::vector<char> Indexes;
446 Indexes.resize(HashGroup.size());
448 for (
unsigned i = 0; i < HashGroup.size(); ++i) {
462 for (
unsigned j = i + 1; j < HashGroup.size(); ++j) {
470 if (!Compare(Prototype, Candidate))
473 PotentialGroup.push_back(Candidate);
480 Result.push_back(PotentialGroup);
483 assert(std::all_of(Indexes.begin(), Indexes.end(),
484 [](
char c) {
return c == 1; }));
489 void VariablePattern::addVariableOccurence(
const VarDecl *
VarDecl,
490 const Stmt *Mention) {
492 for (
size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
493 if (Variables[KindIndex] == VarDecl) {
496 Occurences.emplace_back(KindIndex, Mention);
502 Occurences.emplace_back(Variables.size(), Mention);
503 Variables.push_back(VarDecl);
506 void VariablePattern::addVariables(
const Stmt *S) {
514 if (
auto D = dyn_cast<DeclRefExpr>(S)) {
516 addVariableOccurence(VD, D);
528 unsigned NumberOfDifferences = 0;
530 assert(Other.Occurences.size() == Occurences.size());
531 for (
unsigned i = 0; i < Occurences.size(); ++i) {
532 auto ThisOccurence = Occurences[i];
533 auto OtherOccurence = Other.Occurences[i];
534 if (ThisOccurence.KindID == OtherOccurence.KindID)
537 ++NumberOfDifferences;
541 if (FirstMismatch ==
nullptr)
546 if (NumberOfDifferences != 1)
549 const VarDecl *FirstSuggestion =
nullptr;
553 if (OtherOccurence.KindID < Variables.size())
554 FirstSuggestion = Variables[OtherOccurence.KindID];
559 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
565 const VarDecl *SecondSuggestion =
nullptr;
566 if (ThisOccurence.KindID < Other.Variables.size())
567 SecondSuggestion = Other.Variables[ThisOccurence.KindID];
572 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
587 return NumberOfDifferences;
const Stmt * front() const
Returns the first statement in this sequence.
Defines the clang::ASTContext interface.
Stmt - This represents one statement.
Analyzes the pattern of the referenced variables in a statement.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
Decl - This represents one declaration (or definition), e.g.
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, std::function< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
bool empty() const
Returns true if and only if this StmtSequence contains no statements.
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
VarDecl - An instance of this class is created to represent a variable declaration or definition...
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation...
StmtSequence()
Constructs an empty StmtSequence.
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
const Stmt * back() const
Returns the last statement in this sequence.
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
Identifies a list of statements.
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
const LangOptions & getLangOpts() const
SourceLocation getImmediateMacroCallerLoc(SourceLocation Loc) const
Gets the location of the immediate macro caller, one level up the stack toward the initial macro type...
static void printMacroName(llvm::raw_string_ostream &MacroStack, ASTContext &Context, SourceLocation Loc)
Prints the macro name that contains the given SourceLocation into the given raw_string_ostream.
detail::InMemoryDirectory::const_iterator I
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
CompoundStmt - This represents a group of statements like { stmt stmt }.
SourceLocation getLocEnd() const LLVM_READONLY
StringRef getFilename(SourceLocation SpellingLoc) const
Return the filename of the file containing a SourceLocation.
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
StringRef IgnoredFilesPattern
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
The result type of a method or function.
std::shared_ptr< llvm::Regex > IgnoredFilesRegex
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group...
Utility class holding the relevant information about a single clone in this pair. ...
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
Encodes a location in the source.
ASTContext & getASTContext() const LLVM_READONLY
ASTContext & getASTContext() const
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
size_t calculateStmtComplexity(const StmtSequence &Seq, const std::string &ParentMacroStack="")
const Stmt *const * iterator
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
bool holdsSequence() const
Returns true if this objects holds a list of statements.
unsigned size() const
Returns the number of statements this object holds.
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
SourceManager & getSourceManager()
SourceLocation getStartLoc() const
Returns the start sourcelocation of the first statement in this sequence.
static size_t createHash(llvm::MD5 &Hash)
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
A trivial tuple used to represent a source range.
Collects the data of a single Stmt.
SourceLocation getLocStart() const LLVM_READONLY
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
This class handles loading and caching of source files into memory.
void constrain(std::vector< CloneDetector::CloneGroup > &Result)