25using namespace IRSimilarity;
31 cl::desc(
"disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
37 cl::desc(
"disable outlining indirect calls."));
41 cl::desc(
"only allow matching call instructions if the "
42 "name and type signature match."));
46 cl::desc(
"Don't match or outline intrinsics"));
51 : Inst(&
I),
Legal(Legality), IDL(&IDList) {
61 if (Predicate !=
C->getPredicate())
90 assert(isa<BranchInst>(
Inst) &&
"Instruction must be branch");
96 assert(BBNumIt != BasicBlockToInteger.
end() &&
97 "Could not find location for BasicBlock!");
99 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
104 assert(BBNumIt != BasicBlockToInteger.
end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
115 isa<PHINode>(
Inst)) &&
"Instruction must be branch or PHINode");
134 assert(CI &&
"Instruction must be call");
161 assert(isa<PHINode>(
Inst) &&
"Instruction must be phi node");
167 assert(BBNumIt != BasicBlockToInteger.
end() &&
168 "Could not find location for BasicBlock!");
170 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
177 assert(BBNumIt != BasicBlockToInteger.
end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
204 "Can only get a predicate from a compare instruction");
209 return cast<CmpInst>(
Inst)->getPredicate();
214 "Can only get a name from a call instruction");
224 if (!
A.Legal || !
B.Legal)
229 if (!
A.Inst->isSameOperationAs(
B.Inst)) {
233 if (isa<CmpInst>(
A.Inst) && isa<CmpInst>(
B.Inst)) {
235 if (
A.getPredicate() !=
B.getPredicate())
240 auto ZippedTypes =
zip(
A.OperVals,
B.OperVals);
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
254 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
A.Inst)) {
255 auto *OtherGEP = cast<GetElementPtrInst>(
B.Inst);
259 if (
GEP->isInBounds() != OtherGEP->isInBounds())
262 auto ZippedOperands =
zip(
GEP->indices(), OtherGEP->indices());
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
276 if (isa<CallInst>(
A.Inst) && isa<CallInst>(
B.Inst)) {
277 if (
A.getCalleeName() !=
B.getCalleeName())
281 if (isa<BranchInst>(
A.Inst) && isa<BranchInst>(
B.Inst) &&
282 A.RelativeBlockLocations.size() !=
B.RelativeBlockLocations.size())
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector<unsigned> &IntegerMapping) {
295 std::vector<unsigned> IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
324 std::vector<IRInstructionData *> &InstrListForBB) {
338 InstrListForBB.push_back(
ID);
340 if (isa<BranchInst>(*It))
343 if (isa<CallInst>(*It))
346 if (isa<PHINode>(*It))
353 std::tie(ResultIt, WasInserted) =
355 unsigned INumber = ResultIt->second;
361 IntegerMappingForBB.push_back(INumber);
365 "Instruction mapping overflow!");
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
395 std::vector<IRInstructionData *> &InstrListForBB,
bool End) {
408 InstrListForBB.push_back(
ID);
416 "Instruction mapping overflow!");
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
430 : StartIdx(StartIdx), Len(Len) {
432 assert(FirstInstIt !=
nullptr &&
"Instruction is nullptr!");
433 assert(LastInstIt !=
nullptr &&
"Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 ==
static_cast<unsigned>(std::distance(
438 "Length of the first and last IRInstructionData do not match the "
456 unsigned LocalValNumber = 1;
458 for (
unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++,
ID++) {
461 for (
Value *Arg :
ID->OperVals)
464 NumberToValue.try_emplace(LocalValNumber, Arg);
472 NumberToValue.try_emplace(LocalValNumber,
ID->Inst);
480 FirstInst = FirstInstIt;
481 LastInst = LastInstIt;
491 NumberToValue.try_emplace(LocalValNumber, BB);
498 if (
A.getLength() !=
B.getLength())
501 auto InstrDataForBoth =
504 return all_of(InstrDataForBoth,
505 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
508 if (!
A.Legal || !
B.Legal)
546 for (
Value *V : SourceOperands) {
547 ArgVal = SourceValueToNumberMapping.
find(V)->second;
550 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
551 std::make_pair(ArgVal, TargetValueNumbers));
557 for (
unsigned &Curr : ValueMappingIt->second)
559 if (TargetValueNumbers.
contains(Curr))
567 if (NewSet.
size() != ValueMappingIt->second.
size())
568 ValueMappingIt->second.
swap(NewSet);
572 if (ValueMappingIt->second.
size() != 1)
575 unsigned ValToRemove = *ValueMappingIt->second.
begin();
579 for (
Value *InnerV : SourceOperands) {
583 unsigned InnerVal = SourceValueToNumberMapping.
find(InnerV)->second;
584 ValueMappingIt = CurrentSrcTgtNumberMapping.
find(InnerVal);
585 if (ValueMappingIt == CurrentSrcTgtNumberMapping.
end())
588 ValueMappingIt->second.
erase(ValToRemove);
589 if (ValueMappingIt->second.
empty())
610 unsigned SourceArgVal,
unsigned TargetArgVal) {
629 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.
insert(
642 if (TargetSet.
size() > 1 && TargetSet.
contains(TargetArgVal)) {
644 TargetSet.
insert(TargetArgVal);
649 return TargetSet.
contains(TargetArgVal);
658 unsigned OperandLength =
A.OperVals.size();
661 for (
unsigned Idx = 0;
Idx < OperandLength;
Idx++, VItA++, VItB++) {
662 unsigned OperValA =
A.IRSC.ValueToNumber.find(*VItA)->second;
663 unsigned OperValB =
B.IRSC.ValueToNumber.find(*VItB)->second;
693 unsigned OperandLength =
A.OperVals.size();
696 for (
unsigned Idx = 0;
Idx < OperandLength;
697 Idx++, VItA++, VItB++) {
698 ValueNumbersA.
insert(
A.IRSC.ValueToNumber.find(*VItA)->second);
699 ValueNumbersB.
insert(
B.IRSC.ValueToNumber.find(*VItB)->second);
706 A.ValueNumberMapping,
A.OperVals,
714 B.ValueNumberMapping,
B.OperVals,
722 const unsigned InstValA,
const unsigned &InstValB,
727 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
729 if (!WasInserted && !ValueMappingIt->second.
contains(InstValB))
731 else if (ValueMappingIt->second.
size() != 1) {
732 for (
unsigned OtherVal : ValueMappingIt->second) {
733 if (OtherVal == InstValB)
735 if (!ValueNumberMappingA.contains(OtherVal))
737 if (!ValueNumberMappingA[OtherVal].
contains(InstValA))
739 ValueNumberMappingA[OtherVal].erase(InstValA);
741 ValueNumberMappingA.erase(ValueMappingIt);
742 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
758 A.IRSC.getBasicBlocks(BasicBlockA);
759 B.IRSC.getBasicBlocks(BasicBlockB);
762 bool AContained = BasicBlockA.
contains(ABB);
763 bool BContained = BasicBlockB.
contains(BBB);
767 if (AContained != BContained)
773 return A.RelativeLocation ==
B.RelativeLocation;
793 if (
A.getLength() !=
B.getLength())
796 if (
A.ValueToNumber.size() !=
B.ValueToNumber.size())
808 unsigned SectionLength =
A.getStartIdx() +
A.getLength();
809 for (
unsigned Loc =
A.getStartIdx(); Loc < SectionLength;
810 ItA++, ItB++, Loc++) {
818 if (!ItA->Legal || !ItB->Legal)
825 unsigned InstValA =
A.ValueToNumber.find(IA)->second;
826 unsigned InstValB =
B.ValueToNumber.find(IB)->second;
830 ValueNumberMappingB))
834 ValueNumberMappingA))
840 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
841 !isa<IntrinsicInst>(IA)) {
843 {
A, OperValsA, ValueNumberMappingA},
844 {
B, OperValsB, ValueNumberMappingB}))
851 {
A, OperValsA, ValueNumberMappingA},
852 {
B, OperValsB, ValueNumberMappingB}))
866 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
867 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
877 if (RelBlockLocsA.
size() != RelBlockLocsB.
size() &&
882 "Block information vectors not the same size.");
884 "Block information vectors not the same size.");
887 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
888 if (
any_of(ZippedRelativeLocations,
889 [&
A, &
B](std::tuple<int, int, Value *, Value *> R) {
891 {
A, std::get<0>(R), std::get<2>(R)},
892 {
B, std::get<1>(R), std::get<3>(R)});
906 return X.StartIdx <=
Y.getEndIdx() &&
Y.StartIdx >=
X.StartIdx;
909 return DoesOverlap(
A,
B) || DoesOverlap(
B,
A);
912void IRSimilarityIdentifier::populateMapper(
913 Module &M, std::vector<IRInstructionData *> &InstrList,
914 std::vector<unsigned> &IntegerMapping) {
916 std::vector<IRInstructionData *> InstrListForModule;
917 std::vector<unsigned> IntegerMappingForModule;
933 IntegerMappingForModule);
939 if (InstrListForModule.size() > 0)
950void IRSimilarityIdentifier::populateMapper(
951 ArrayRef<std::unique_ptr<Module>> &Modules,
952 std::vector<IRInstructionData *> &InstrList,
953 std::vector<unsigned> &IntegerMapping) {
956 for (
const std::unique_ptr<Module> &M : Modules)
957 populateMapper(*M, InstrList, IntegerMapping);
972 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
974 unsigned StringLen = RS.
Length;
980 unsigned EndIdx = StartIdx + StringLen - 1;
983 bool ContainsIllegal =
false;
984 for (
unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
985 unsigned Key = IntegerMapping[CurrIdx];
987 ContainsIllegal =
true;
1000 std::vector<IRInstructionData *>::iterator StartIt = InstrList.
begin();
1001 std::advance(StartIt, StartIdx);
1002 std::vector<IRInstructionData *>::iterator EndIt = InstrList.
begin();
1003 std::advance(EndIt, EndIdx);
1005 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1013 assert(SourceCand.CanonNumToNumber.
size() != 0 &&
1014 "Base canonical relationship is empty!");
1015 assert(SourceCand.NumberToCanonNum.
size() != 0 &&
1016 "Base canonical relationship is empty!");
1018 assert(CanonNumToNumber.
size() == 0 &&
"Canonical Relationship is non-empty");
1019 assert(NumberToCanonNum.
size() == 0 &&
"Canonical Relationship is non-empty");
1026 unsigned SourceGVN = GVNMapping.first;
1028 assert(GVNMapping.second.size() != 0 &&
"Possible GVNs is 0!");
1035 if (GVNMapping.second.size() > 1) {
1037 for (
unsigned Val : GVNMapping.second) {
1044 FromSourceMapping.find(Val);
1046 if (!It->second.
contains(SourceGVN))
1055 assert(Found &&
"Could not find matching value for source GVN");
1059 ResultGVN = *GVNMapping.second.begin();
1062 UsedGVNs.
insert(ResultGVN);
1065 CanonNumToNumber.
insert(std::make_pair(CanonNum, SourceGVN));
1066 NumberToCanonNum.
insert(std::make_pair(SourceGVN, CanonNum));
1078 unsigned BBGVNForCurrCand = ValueToNumber.
find(BB)->second;
1082 if (NumberToCanonNum.
contains(BBGVNForCurrCand))
1090 : &*BB->instructionsWithoutDebug().begin();
1092 unsigned FirstInstGVN = *
getGVN(FirstOutlineInst);
1097 unsigned SourceBBGVN = *SourceCand.
getGVN(SourceBB);
1099 CanonNumToNumber.
insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1100 NumberToCanonNum.
insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1108 "Canonical Relationship is non-empty");
1110 "Canonical Relationship is non-empty");
1112 assert(!SourceCandLarge.CanonNumToNumber.
empty() &&
1113 "Canonical Relationship is non-empty");
1114 assert(!SourceCandLarge.NumberToCanonNum.
empty() &&
1115 "Canonical Relationship is non-empty");
1117 assert(!TargetCandLarge.CanonNumToNumber.
empty() &&
1118 "Canonical Relationship is non-empty");
1119 assert(!TargetCandLarge.NumberToCanonNum.
empty() &&
1120 "Canonical Relationship is non-empty");
1122 assert(CanonNumToNumber.
empty() &&
"Canonical Relationship is non-empty");
1123 assert(NumberToCanonNum.
empty() &&
"Canonical Relationship is non-empty");
1129 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1130 Value *CurrVal = ValueNumPair.first;
1131 unsigned TargetCandGVN = ValueNumPair.second;
1135 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.
getGVN(CurrVal);
1136 assert(OLargeTargetGVN.has_value() &&
"GVN not found for Value");
1139 std::optional<unsigned> OTargetCandCanon =
1141 assert(OTargetCandCanon.has_value() &&
1142 "Canononical Number not found for GVN");
1145 std::optional<unsigned> OLargeSourceGVN =
1147 assert(OLargeSourceGVN.has_value() &&
1148 "GVN Number not found for Canonical Number");
1151 std::optional<Value *> OLargeSourceV =
1152 SourceCandLarge.
fromGVN(OLargeSourceGVN.value());
1153 assert(OLargeSourceV.has_value() &&
"Value not found for GVN");
1156 std::optional<unsigned> OSourceGVN =
1157 SourceCand.
getGVN(OLargeSourceV.value());
1158 assert(OSourceGVN.has_value() &&
"GVN Number not found for Value");
1161 std::optional<unsigned> OSourceCanon =
1163 assert(OSourceCanon.has_value() &&
"Canon Number not found for GVN");
1168 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1170 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1176 assert(CurrCand.CanonNumToNumber.
size() == 0 &&
1177 "Canonical Relationship is non-empty");
1178 assert(CurrCand.NumberToCanonNum.
size() == 0 &&
1179 "Canonical Relationship is non-empty");
1181 unsigned CanonNum = 0;
1184 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1185 CurrCand.NumberToCanonNum.
insert(std::make_pair(NumToVal.first, CanonNum));
1186 CurrCand.CanonNumToNumber.
insert(std::make_pair(CanonNum, NumToVal.first));
1204static std::optional<
1205 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1217 auto IdxToCandidateIt = IndexToIncludedCand.
find(CandA.
getStartIdx());
1218 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1225 if (IdxToCandidateIt == IndexToIncludedCand.end())
1231 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1232 IncludedGroupAndCandA.
insert(std::make_pair(GroupNum, MatchedCand));
1233 IncludedGroupsA.
insert(GroupNum);
1238 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1239 if (IdxToCandidateIt == IndexToIncludedCand.end())
1245 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1246 IncludedGroupAndCandB.
insert(std::make_pair(GroupNum, MatchedCand));
1247 IncludedGroupsB.
insert(GroupNum);
1256 if (IncludedGroupsA.
empty())
1260 auto ItA = IncludedGroupAndCandA.
find(*IncludedGroupsA.
begin());
1261 auto ItB = IncludedGroupAndCandB.
find(*IncludedGroupsA.
begin());
1262 Result = std::make_pair(ItA->second, ItB->second);
1280 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1285 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1300 unsigned CurrentGroupNum = 0;
1301 unsigned OuterGroupNum;
1310 for (CandIt = CandsForRepSubstring.begin(),
1311 CandEndIt = CandsForRepSubstring.end();
1312 CandIt != CandEndIt; CandIt++) {
1315 CandToGroupIt = CandToGroup.
find(&*CandIt);
1316 if (CandToGroupIt == CandToGroup.
end()) {
1318 std::tie(CandToGroupIt, Inserted) =
1319 CandToGroup.
insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1323 OuterGroupNum = CandToGroupIt->second;
1327 CurrentGroupPair = StructuralGroups.
find(OuterGroupNum);
1328 if (CurrentGroupPair == StructuralGroups.
end()) {
1330 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.
insert(
1338 for (InnerCandIt = std::next(CandIt),
1339 InnerCandEndIt = CandsForRepSubstring.end();
1340 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1343 CandToGroupItInner = CandToGroup.
find(&*InnerCandIt);
1344 if (CandToGroupItInner != CandToGroup.
end())
1349 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1351 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1356 if (LargerPair.has_value()) {
1357 SameStructure =
true;
1358 InnerCandIt->createCanonicalRelationFrom(
1359 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1360 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1361 CurrentGroupPair->second.push_back(*InnerCandIt);
1367 ValueNumberMappingA.
clear();
1368 ValueNumberMappingB.
clear();
1370 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1374 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1375 ValueNumberMappingB);
1376 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1377 CurrentGroupPair->second.push_back(*InnerCandIt);
1382void IRSimilarityIdentifier::findCandidates(
1383 std::vector<IRInstructionData *> &InstrList,
1384 std::vector<unsigned> &IntegerMapping) {
1387 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1388 std::vector<SimilarityGroup> NewCandidateGroups;
1399 std::vector<SuffixTree::RepeatedSubstring> RSes;
1405 return LHS.Length >
RHS.Length;
1409 CandsForRepSubstring);
1411 if (CandsForRepSubstring.size() < 2)
1415 IndexToIncludedCand, CandToGroup);
1416 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1420 if (Group.second.size() > 1) {
1421 SimilarityCandidates->push_back(Group.second);
1426 for (
unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1430 IdIt = IndexToIncludedCand.
find(
Idx);
1432 if (IdIt == IndexToIncludedCand.
end())
1433 std::tie(IdIt, Inserted) = IndexToIncludedCand.
insert(
1435 IdIt->second.
insert(&IRCand);
1439 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1444 CandsForRepSubstring.clear();
1445 StructuralGroups.clear();
1446 NewCandidateGroups.clear();
1451 ArrayRef<std::unique_ptr<Module>> Modules) {
1454 std::vector<IRInstructionData *> InstrList;
1455 std::vector<unsigned> IntegerMapping;
1462 populateMapper(Modules, InstrList, IntegerMapping);
1463 findCandidates(InstrList, IntegerMapping);
1465 return *SimilarityCandidates;
1476 std::vector<IRInstructionData *> InstrList;
1477 std::vector<unsigned> IntegerMapping;
1479 populateMapper(M, InstrList, IntegerMapping);
1480 findCandidates(InstrList, IntegerMapping);
1482 return *SimilarityCandidates;
1486 "ir-similarity-identifier",
false,
true)
1507 IRSI->findSimilarity(M);
1517 IRSI.findSimilarity(M);
1524 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1527 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1528 OS << CandVec.size() <<
" candidates of length "
1529 << CandVec.begin()->getLength() <<
". Found in: \n";
1531 OS <<
" Function: " << Cand.front()->Inst->getFunction()->getName().str()
1532 <<
", Basic Block: ";
1533 if (Cand.front()->Inst->getParent()->getName().str() ==
"")
1536 OS << Cand.front()->Inst->getParent()->getName().str();
1537 OS <<
"\n Start Instruction: ";
1538 Cand.frontInstruction()->print(
OS);
1539 OS <<
"\n End Instruction: ";
1540 Cand.backInstruction()->print(
OS);
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ ICMP_SGE
signed greater or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getPredicate() const
Return the predicate for this instruction.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent function types.
ArrayRef< Type * > params() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
unsigned getEndIdx() const
static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
IRInstructionDataList::iterator iterator
Instruction * frontInstruction()
static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
unsigned getStartIdx() const
BasicBlock * getStartBB()
IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
void resetSimilarityCandidates()
SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
const ParentTy * getParent() const
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
void push_back(reference Node)
Insert a node at the back; never copies.
@ C
The default llvm calling convention, compatible with C.
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)
cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
IRInstructionDataList * IDL
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
unsigned Length
The length of the string.