25using namespace IRSimilarity;
31 cl::desc(
"disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
37 cl::desc(
"disable outlining indirect calls."));
41 cl::desc(
"only allow matching call instructions if the "
42 "name and type signature match."));
46 cl::desc(
"Don't match or outline intrinsics"));
51 : Inst(&
I),
Legal(Legality), IDL(&IDList) {
61 if (Predicate !=
C->getPredicate())
90 assert(isa<BranchInst>(
Inst) &&
"Instruction must be branch");
96 assert(BBNumIt != BasicBlockToInteger.
end() &&
97 "Could not find location for BasicBlock!");
99 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
104 assert(BBNumIt != BasicBlockToInteger.
end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
115 isa<PHINode>(
Inst)) &&
"Instruction must be branch or PHINode");
134 assert(CI &&
"Instruction must be call");
161 assert(isa<PHINode>(
Inst) &&
"Instruction must be phi node");
167 assert(BBNumIt != BasicBlockToInteger.
end() &&
168 "Could not find location for BasicBlock!");
170 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
177 assert(BBNumIt != BasicBlockToInteger.
end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
204 "Can only get a predicate from a compare instruction");
209 return cast<CmpInst>(
Inst)->getPredicate();
214 "Can only get a name from a call instruction");
224 if (!
A.Legal || !
B.Legal)
229 if (!
A.Inst->isSameOperationAs(
B.Inst)) {
233 if (isa<CmpInst>(
A.Inst) && isa<CmpInst>(
B.Inst)) {
235 if (
A.getPredicate() !=
B.getPredicate())
240 auto ZippedTypes =
zip(
A.OperVals,
B.OperVals);
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
254 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
A.Inst)) {
255 auto *OtherGEP = cast<GetElementPtrInst>(
B.Inst);
259 if (
GEP->isInBounds() != OtherGEP->isInBounds())
262 auto ZippedOperands =
zip(
GEP->indices(), OtherGEP->indices());
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
276 if (isa<CallInst>(
A.Inst) && isa<CallInst>(
B.Inst)) {
277 if (
A.getCalleeName() !=
B.getCalleeName())
281 if (isa<BranchInst>(
A.Inst) && isa<BranchInst>(
B.Inst) &&
282 A.RelativeBlockLocations.size() !=
B.RelativeBlockLocations.size())
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector<unsigned> &IntegerMapping) {
295 std::vector<unsigned> IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
324 std::vector<IRInstructionData *> &InstrListForBB) {
338 InstrListForBB.push_back(
ID);
340 if (isa<BranchInst>(*It))
343 if (isa<CallInst>(*It))
346 if (isa<PHINode>(*It))
353 std::tie(ResultIt, WasInserted) =
355 unsigned INumber = ResultIt->second;
361 IntegerMappingForBB.push_back(INumber);
365 "Instruction mapping overflow!");
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
395 std::vector<IRInstructionData *> &InstrListForBB,
bool End) {
408 InstrListForBB.push_back(
ID);
416 "Instruction mapping overflow!");
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
430 : StartIdx(StartIdx), Len(Len) {
432 assert(FirstInstIt !=
nullptr &&
"Instruction is nullptr!");
433 assert(LastInstIt !=
nullptr &&
"Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 ==
static_cast<unsigned>(std::distance(
438 "Length of the first and last IRInstructionData do not match the "
456 unsigned LocalValNumber = 1;
458 for (
unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++,
ID++) {
461 for (
Value *Arg :
ID->OperVals)
462 if (ValueToNumber.
try_emplace(Arg, LocalValNumber).second) {
463 NumberToValue.try_emplace(LocalValNumber, Arg);
469 if (ValueToNumber.
try_emplace(
ID->Inst, LocalValNumber).second) {
470 NumberToValue.try_emplace(LocalValNumber,
ID->Inst);
478 FirstInst = FirstInstIt;
479 LastInst = LastInstIt;
485 if (ValueToNumber.
try_emplace(BB, LocalValNumber).second) {
486 NumberToValue.try_emplace(LocalValNumber, BB);
494 if (
A.getLength() !=
B.getLength())
497 auto InstrDataForBoth =
500 return all_of(InstrDataForBoth,
501 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
504 if (!
A.Legal || !
B.Legal)
542 for (
Value *V : SourceOperands) {
543 ArgVal = SourceValueToNumberMapping.
find(V)->second;
546 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
547 std::make_pair(ArgVal, TargetValueNumbers));
553 for (
unsigned &Curr : ValueMappingIt->second)
555 if (TargetValueNumbers.
contains(Curr))
563 if (NewSet.
size() != ValueMappingIt->second.
size())
564 ValueMappingIt->second.
swap(NewSet);
568 if (ValueMappingIt->second.
size() != 1)
571 unsigned ValToRemove = *ValueMappingIt->second.
begin();
575 for (
Value *InnerV : SourceOperands) {
579 unsigned InnerVal = SourceValueToNumberMapping.
find(InnerV)->second;
580 ValueMappingIt = CurrentSrcTgtNumberMapping.
find(InnerVal);
581 if (ValueMappingIt == CurrentSrcTgtNumberMapping.
end())
584 ValueMappingIt->second.
erase(ValToRemove);
585 if (ValueMappingIt->second.
empty())
606 unsigned SourceArgVal,
unsigned TargetArgVal) {
625 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.
insert(
638 if (TargetSet.
size() > 1 && TargetSet.
contains(TargetArgVal)) {
640 TargetSet.
insert(TargetArgVal);
645 return TargetSet.
contains(TargetArgVal);
654 unsigned OperandLength =
A.OperVals.size();
657 for (
unsigned Idx = 0;
Idx < OperandLength;
Idx++, VItA++, VItB++) {
658 unsigned OperValA =
A.IRSC.ValueToNumber.find(*VItA)->second;
659 unsigned OperValB =
B.IRSC.ValueToNumber.find(*VItB)->second;
689 unsigned OperandLength =
A.OperVals.size();
692 for (
unsigned Idx = 0;
Idx < OperandLength;
693 Idx++, VItA++, VItB++) {
694 ValueNumbersA.
insert(
A.IRSC.ValueToNumber.find(*VItA)->second);
695 ValueNumbersB.
insert(
B.IRSC.ValueToNumber.find(*VItB)->second);
702 A.ValueNumberMapping,
A.OperVals,
710 B.ValueNumberMapping,
B.OperVals,
718 const unsigned InstValA,
const unsigned &InstValB,
723 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
725 if (!WasInserted && !ValueMappingIt->second.
contains(InstValB))
727 else if (ValueMappingIt->second.
size() != 1) {
728 for (
unsigned OtherVal : ValueMappingIt->second) {
729 if (OtherVal == InstValB)
731 if (!ValueNumberMappingA.contains(OtherVal))
733 if (!ValueNumberMappingA[OtherVal].
contains(InstValA))
735 ValueNumberMappingA[OtherVal].erase(InstValA);
737 ValueNumberMappingA.erase(ValueMappingIt);
738 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
754 A.IRSC.getBasicBlocks(BasicBlockA);
755 B.IRSC.getBasicBlocks(BasicBlockB);
758 bool AContained = BasicBlockA.
contains(ABB);
759 bool BContained = BasicBlockB.
contains(BBB);
763 if (AContained != BContained)
769 return A.RelativeLocation ==
B.RelativeLocation;
789 if (
A.getLength() !=
B.getLength())
792 if (
A.ValueToNumber.size() !=
B.ValueToNumber.size())
804 unsigned SectionLength =
A.getStartIdx() +
A.getLength();
805 for (
unsigned Loc =
A.getStartIdx(); Loc < SectionLength;
806 ItA++, ItB++, Loc++) {
814 if (!ItA->Legal || !ItB->Legal)
821 unsigned InstValA =
A.ValueToNumber.find(IA)->second;
822 unsigned InstValB =
B.ValueToNumber.find(IB)->second;
826 ValueNumberMappingB))
830 ValueNumberMappingA))
836 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
837 !isa<IntrinsicInst>(IA)) {
839 {
A, OperValsA, ValueNumberMappingA},
840 {
B, OperValsB, ValueNumberMappingB}))
847 {
A, OperValsA, ValueNumberMappingA},
848 {
B, OperValsB, ValueNumberMappingB}))
862 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
863 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
873 if (RelBlockLocsA.
size() != RelBlockLocsB.
size() &&
878 "Block information vectors not the same size.");
880 "Block information vectors not the same size.");
883 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
884 if (
any_of(ZippedRelativeLocations,
885 [&
A, &
B](std::tuple<int, int, Value *, Value *> R) {
887 {
A, std::get<0>(R), std::get<2>(R)},
888 {
B, std::get<1>(R), std::get<3>(R)});
902 return X.StartIdx <=
Y.getEndIdx() &&
Y.StartIdx >=
X.StartIdx;
905 return DoesOverlap(
A,
B) || DoesOverlap(
B,
A);
908void IRSimilarityIdentifier::populateMapper(
909 Module &M, std::vector<IRInstructionData *> &InstrList,
910 std::vector<unsigned> &IntegerMapping) {
912 std::vector<IRInstructionData *> InstrListForModule;
913 std::vector<unsigned> IntegerMappingForModule;
929 IntegerMappingForModule);
935 if (InstrListForModule.size() > 0)
946void IRSimilarityIdentifier::populateMapper(
947 ArrayRef<std::unique_ptr<Module>> &Modules,
948 std::vector<IRInstructionData *> &InstrList,
949 std::vector<unsigned> &IntegerMapping) {
952 for (
const std::unique_ptr<Module> &M : Modules)
953 populateMapper(*M, InstrList, IntegerMapping);
968 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
970 unsigned StringLen = RS.
Length;
976 unsigned EndIdx = StartIdx + StringLen - 1;
979 bool ContainsIllegal =
false;
980 for (
unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
981 unsigned Key = IntegerMapping[CurrIdx];
983 ContainsIllegal =
true;
996 std::vector<IRInstructionData *>::iterator StartIt = InstrList.
begin();
997 std::advance(StartIt, StartIdx);
998 std::vector<IRInstructionData *>::iterator EndIt = InstrList.
begin();
999 std::advance(EndIt, EndIdx);
1001 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1009 assert(SourceCand.CanonNumToNumber.
size() != 0 &&
1010 "Base canonical relationship is empty!");
1011 assert(SourceCand.NumberToCanonNum.
size() != 0 &&
1012 "Base canonical relationship is empty!");
1014 assert(CanonNumToNumber.
size() == 0 &&
"Canonical Relationship is non-empty");
1015 assert(NumberToCanonNum.
size() == 0 &&
"Canonical Relationship is non-empty");
1022 unsigned SourceGVN = GVNMapping.first;
1024 assert(GVNMapping.second.size() != 0 &&
"Possible GVNs is 0!");
1031 if (GVNMapping.second.size() > 1) {
1033 for (
unsigned Val : GVNMapping.second) {
1040 FromSourceMapping.find(Val);
1042 if (!It->second.
contains(SourceGVN))
1051 assert(Found &&
"Could not find matching value for source GVN");
1055 ResultGVN = *GVNMapping.second.begin();
1058 UsedGVNs.
insert(ResultGVN);
1061 CanonNumToNumber.
insert(std::make_pair(CanonNum, SourceGVN));
1062 NumberToCanonNum.
insert(std::make_pair(SourceGVN, CanonNum));
1074 unsigned BBGVNForCurrCand = ValueToNumber.
find(BB)->second;
1078 if (NumberToCanonNum.
contains(BBGVNForCurrCand))
1086 : &*BB->instructionsWithoutDebug().begin();
1088 unsigned FirstInstGVN = *
getGVN(FirstOutlineInst);
1093 unsigned SourceBBGVN = *SourceCand.
getGVN(SourceBB);
1095 CanonNumToNumber.
insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1096 NumberToCanonNum.
insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1104 "Canonical Relationship is non-empty");
1106 "Canonical Relationship is non-empty");
1108 assert(!SourceCandLarge.CanonNumToNumber.
empty() &&
1109 "Canonical Relationship is non-empty");
1110 assert(!SourceCandLarge.NumberToCanonNum.
empty() &&
1111 "Canonical Relationship is non-empty");
1113 assert(!TargetCandLarge.CanonNumToNumber.
empty() &&
1114 "Canonical Relationship is non-empty");
1115 assert(!TargetCandLarge.NumberToCanonNum.
empty() &&
1116 "Canonical Relationship is non-empty");
1118 assert(CanonNumToNumber.
empty() &&
"Canonical Relationship is non-empty");
1119 assert(NumberToCanonNum.
empty() &&
"Canonical Relationship is non-empty");
1125 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1126 Value *CurrVal = ValueNumPair.first;
1127 unsigned TargetCandGVN = ValueNumPair.second;
1131 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.
getGVN(CurrVal);
1132 assert(OLargeTargetGVN.has_value() &&
"GVN not found for Value");
1135 std::optional<unsigned> OTargetCandCanon =
1137 assert(OTargetCandCanon.has_value() &&
1138 "Canononical Number not found for GVN");
1141 std::optional<unsigned> OLargeSourceGVN =
1143 assert(OLargeSourceGVN.has_value() &&
1144 "GVN Number not found for Canonical Number");
1147 std::optional<Value *> OLargeSourceV =
1148 SourceCandLarge.
fromGVN(OLargeSourceGVN.value());
1149 assert(OLargeSourceV.has_value() &&
"Value not found for GVN");
1152 std::optional<unsigned> OSourceGVN =
1153 SourceCand.
getGVN(OLargeSourceV.value());
1154 assert(OSourceGVN.has_value() &&
"GVN Number not found for Value");
1157 std::optional<unsigned> OSourceCanon =
1159 assert(OSourceCanon.has_value() &&
"Canon Number not found for GVN");
1164 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1166 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1172 assert(CurrCand.CanonNumToNumber.
size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174 assert(CurrCand.NumberToCanonNum.
size() == 0 &&
1175 "Canonical Relationship is non-empty");
1177 unsigned CanonNum = 0;
1180 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1181 CurrCand.NumberToCanonNum.
insert(std::make_pair(NumToVal.first, CanonNum));
1182 CurrCand.CanonNumToNumber.
insert(std::make_pair(CanonNum, NumToVal.first));
1200static std::optional<
1201 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1213 auto IdxToCandidateIt = IndexToIncludedCand.
find(CandA.
getStartIdx());
1214 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1221 if (IdxToCandidateIt == IndexToIncludedCand.end())
1227 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1228 IncludedGroupAndCandA.
insert(std::make_pair(GroupNum, MatchedCand));
1229 IncludedGroupsA.
insert(GroupNum);
1234 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1235 if (IdxToCandidateIt == IndexToIncludedCand.end())
1241 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1242 IncludedGroupAndCandB.
insert(std::make_pair(GroupNum, MatchedCand));
1243 IncludedGroupsB.
insert(GroupNum);
1252 if (IncludedGroupsA.
empty())
1256 auto ItA = IncludedGroupAndCandA.
find(*IncludedGroupsA.
begin());
1257 auto ItB = IncludedGroupAndCandB.
find(*IncludedGroupsA.
begin());
1258 Result = std::make_pair(ItA->second, ItB->second);
1276 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1281 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1296 unsigned CurrentGroupNum = 0;
1297 unsigned OuterGroupNum;
1306 for (CandIt = CandsForRepSubstring.begin(),
1307 CandEndIt = CandsForRepSubstring.end();
1308 CandIt != CandEndIt; CandIt++) {
1311 CandToGroupIt = CandToGroup.
find(&*CandIt);
1312 if (CandToGroupIt == CandToGroup.
end()) {
1314 std::tie(CandToGroupIt, Inserted) =
1315 CandToGroup.
insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1319 OuterGroupNum = CandToGroupIt->second;
1323 CurrentGroupPair = StructuralGroups.
find(OuterGroupNum);
1324 if (CurrentGroupPair == StructuralGroups.
end()) {
1326 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.
insert(
1334 for (InnerCandIt = std::next(CandIt),
1335 InnerCandEndIt = CandsForRepSubstring.end();
1336 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1339 CandToGroupItInner = CandToGroup.
find(&*InnerCandIt);
1340 if (CandToGroupItInner != CandToGroup.
end())
1345 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1347 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1352 if (LargerPair.has_value()) {
1353 SameStructure =
true;
1354 InnerCandIt->createCanonicalRelationFrom(
1355 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1356 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1357 CurrentGroupPair->second.push_back(*InnerCandIt);
1363 ValueNumberMappingA.
clear();
1364 ValueNumberMappingB.
clear();
1366 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1370 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1371 ValueNumberMappingB);
1372 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1373 CurrentGroupPair->second.push_back(*InnerCandIt);
1378void IRSimilarityIdentifier::findCandidates(
1379 std::vector<IRInstructionData *> &InstrList,
1380 std::vector<unsigned> &IntegerMapping) {
1383 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1384 std::vector<SimilarityGroup> NewCandidateGroups;
1395 std::vector<SuffixTree::RepeatedSubstring> RSes;
1401 return LHS.Length >
RHS.Length;
1405 CandsForRepSubstring);
1407 if (CandsForRepSubstring.size() < 2)
1411 IndexToIncludedCand, CandToGroup);
1412 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1416 if (Group.second.size() > 1) {
1417 SimilarityCandidates->push_back(Group.second);
1422 for (
unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1424 IndexToIncludedCand[
Idx].
insert(&IRCand);
1427 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1432 CandsForRepSubstring.clear();
1433 StructuralGroups.clear();
1434 NewCandidateGroups.clear();
1439 ArrayRef<std::unique_ptr<Module>> Modules) {
1442 std::vector<IRInstructionData *> InstrList;
1443 std::vector<unsigned> IntegerMapping;
1450 populateMapper(Modules, InstrList, IntegerMapping);
1451 findCandidates(InstrList, IntegerMapping);
1453 return *SimilarityCandidates;
1464 std::vector<IRInstructionData *> InstrList;
1465 std::vector<unsigned> IntegerMapping;
1467 populateMapper(M, InstrList, IntegerMapping);
1468 findCandidates(InstrList, IntegerMapping);
1470 return *SimilarityCandidates;
1474 "ir-similarity-identifier",
false,
true)
1495 IRSI->findSimilarity(M);
1505 IRSI.findSimilarity(M);
1512 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1515 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1516 OS << CandVec.size() <<
" candidates of length "
1517 << CandVec.begin()->getLength() <<
". Found in: \n";
1519 OS <<
" Function: " << Cand.front()->Inst->getFunction()->getName().str()
1520 <<
", Basic Block: ";
1521 if (Cand.front()->Inst->getParent()->getName().str() ==
"")
1524 OS << Cand.front()->Inst->getParent()->getName().str();
1525 OS <<
"\n Start Instruction: ";
1526 Cand.frontInstruction()->print(
OS);
1527 OS <<
"\n End Instruction: ";
1528 Cand.backInstruction()->print(
OS);
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines generic set operations that may be used on set's of different types,...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ ICMP_SGE
signed greater or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getPredicate() const
Return the predicate for this instruction.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent function types.
ArrayRef< Type * > params() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
unsigned getEndIdx() const
static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
IRInstructionDataList::iterator iterator
Instruction * frontInstruction()
static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
unsigned getStartIdx() const
BasicBlock * getStartBB()
IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
void resetSimilarityCandidates()
SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
const ParentTy * getParent() const
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
void push_back(reference Node)
Insert a node at the back; never copies.
@ C
The default llvm calling convention, compatible with C.
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)
cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
IRInstructionDataList * IDL
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
unsigned Length
The length of the string.