LLVM 20.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
59 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
68 if (isa<CmpInst>(Inst) && RevisedPredicate) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
80 if (PHINode *PN = dyn_cast<PHINode>(Inst))
81 for (BasicBlock *BB : PN->blocks())
83}
84
86 : IDL(&IDList) {}
87
89 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
90 assert(isa<BranchInst>(Inst) && "Instruction must be branch");
91
92 BranchInst *BI = cast<BranchInst>(Inst);
94
95 BBNumIt = BasicBlockToInteger.find(BI->getParent());
96 assert(BBNumIt != BasicBlockToInteger.end() &&
97 "Could not find location for BasicBlock!");
98
99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
100
101 for (Value *V : getBlockOperVals()) {
102 BasicBlock *Successor = cast<BasicBlock>(V);
103 BBNumIt = BasicBlockToInteger.find(Successor);
104 assert(BBNumIt != BasicBlockToInteger.end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
107
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
110 }
111}
112
114 assert((isa<BranchInst>(Inst) ||
115 isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
116
117 if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
120 OperVals.end()
121 );
122
123 if (PHINode *PN = dyn_cast<PHINode>(Inst))
124 return ArrayRef<Value *>(
125 std::next(OperVals.begin(), PN->getNumIncomingValues()),
126 OperVals.end()
127 );
128
129 return ArrayRef<Value *>();
130}
131
132void IRInstructionData::setCalleeName(bool MatchByName) {
133 CallInst *CI = dyn_cast<CallInst>(Inst);
134 assert(CI && "Instruction must be call");
135
136 CalleeName = "";
137 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
138 // To hash intrinsics, we use the opcode, and types like the other
139 // instructions, but also, the Intrinsic ID, and the Name of the
140 // intrinsic.
141 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
142 FunctionType *FT = II->getFunctionType();
143 // If there is an overloaded name, we have to use the complex version
144 // of getName to get the entire string.
145 if (Intrinsic::isOverloaded(IntrinsicID))
146 CalleeName =
147 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
148 // If there is not an overloaded name, we only need to use this version.
149 else
150 CalleeName = Intrinsic::getName(IntrinsicID).str();
151
152 return;
153 }
154
155 if (!CI->isIndirectCall() && MatchByName)
157}
158
160 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
161 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
162
163 PHINode *PN = cast<PHINode>(Inst);
165
166 BBNumIt = BasicBlockToInteger.find(PN->getParent());
167 assert(BBNumIt != BasicBlockToInteger.end() &&
168 "Could not find location for BasicBlock!");
169
170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
171
172 // Convert the incoming blocks of the PHINode to an integer value, based on
173 // the relative distances between the current block and the incoming block.
174 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
176 BBNumIt = BasicBlockToInteger.find(Incoming);
177 assert(BBNumIt != BasicBlockToInteger.end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
180
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
183 }
184}
185
187 switch (CI->getPredicate()) {
196 return CI->getSwappedPredicate();
197 default:
198 return CI->getPredicate();
199 }
200}
201
203 assert(isa<CmpInst>(Inst) &&
204 "Can only get a predicate from a compare instruction");
205
207 return *RevisedPredicate;
208
209 return cast<CmpInst>(Inst)->getPredicate();
210}
211
213 assert(isa<CallInst>(Inst) &&
214 "Can only get a name from a call instruction");
215
216 assert(CalleeName && "CalleeName has not been set");
217
218 return *CalleeName;
219}
220
222 const IRInstructionData &B) {
223
224 if (!A.Legal || !B.Legal)
225 return false;
226
227 // Check if we are performing the same sort of operation on the same types
228 // but not on the same values.
229 if (!A.Inst->isSameOperationAs(B.Inst)) {
230 // If there is a predicate, this means that either there is a swapped
231 // predicate, or that the types are different, we want to make sure that
232 // the predicates are equivalent via swapping.
233 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
234
235 if (A.getPredicate() != B.getPredicate())
236 return false;
237
238 // If the predicates are the same via swap, make sure that the types are
239 // still the same.
240 auto ZippedTypes = zip(A.OperVals, B.OperVals);
241
242 return all_of(
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
245 });
246 }
247
248 return false;
249 }
250
251 // Since any GEP Instruction operands after the first operand cannot be
252 // defined by a register, we must make sure that the operands after the first
253 // are the same in the two instructions
254 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
255 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
256
257 // If the instructions do not have the same inbounds restrictions, we do
258 // not consider them the same.
259 if (GEP->isInBounds() != OtherGEP->isInBounds())
260 return false;
261
262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
263
264 // We increment here since we do not care about the first instruction,
265 // we only care about the following operands since they must be the
266 // exact same to be considered similar.
267 return all_of(drop_begin(ZippedOperands),
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
270 });
271 }
272
273 // If the instructions are functions calls, we make sure that the function
274 // name is the same. We already know that the types are since is
275 // isSameOperationAs is true.
276 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
277 if (A.getCalleeName() != B.getCalleeName())
278 return false;
279 }
280
281 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283 return false;
284
285 return true;
286}
287
288// TODO: This is the same as the MachineOutliner, and should be consolidated
289// into the same interface.
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector<unsigned> &IntegerMapping) {
293 BasicBlock::iterator It = BB.begin();
294
295 std::vector<unsigned> IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
297
298 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
299 switch (InstClassifier.visit(*It)) {
300 case InstrType::Legal:
301 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
302 break;
304 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
305 break;
307 AddedIllegalLastTime = false;
308 break;
309 }
310 }
311
313 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
314 for (IRInstructionData *ID : InstrListForBB)
315 this->IDL->push_back(*ID);
316 llvm::append_range(InstrList, InstrListForBB);
317 llvm::append_range(IntegerMapping, IntegerMappingForBB);
318}
319
320// TODO: This is the same as the MachineOutliner, and should be consolidated
321// into the same interface.
323 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
324 std::vector<IRInstructionData *> &InstrListForBB) {
325 // We added something legal, so we should unset the AddedLegalLastTime
326 // flag.
327 AddedIllegalLastTime = false;
328
329 // If we have at least two adjacent legal instructions (which may have
330 // invisible instructions in between), remember that.
332 HaveLegalRange = true;
334
335 // Get the integer for this instruction or give it the current
336 // LegalInstrNumber.
338 InstrListForBB.push_back(ID);
339
340 if (isa<BranchInst>(*It))
341 ID->setBranchSuccessors(BasicBlockToInteger);
342
343 if (isa<CallInst>(*It))
344 ID->setCalleeName(EnableMatchCallsByName);
345
346 if (isa<PHINode>(*It))
347 ID->setPHIPredecessors(BasicBlockToInteger);
348
349 // Add to the instruction list
350 bool WasInserted;
352 ResultIt;
353 std::tie(ResultIt, WasInserted) =
354 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
355 unsigned INumber = ResultIt->second;
356
357 // There was an insertion.
358 if (WasInserted)
360
361 IntegerMappingForBB.push_back(INumber);
362
363 // Make sure we don't overflow or use any integers reserved by the DenseMap.
365 "Instruction mapping overflow!");
366
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
371
372 return INumber;
373}
374
378 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
379}
380
383 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
384}
385
388 return new (IDLAllocator->Allocate()) IRInstructionDataList();
389}
390
391// TODO: This is the same as the MachineOutliner, and should be consolidated
392// into the same interface.
394 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
395 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396 // Can't combine an illegal instruction. Set the flag.
398
399 // Only add one illegal number per range of legal numbers.
401 return IllegalInstrNumber;
402
403 IRInstructionData *ID = nullptr;
404 if (!End)
405 ID = allocateIRInstructionData(*It, false, *IDL);
406 else
408 InstrListForBB.push_back(ID);
409
410 // Remember that we added an illegal number last time.
412 unsigned INumber = IllegalInstrNumber;
413 IntegerMappingForBB.push_back(IllegalInstrNumber--);
414
416 "Instruction mapping overflow!");
417
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
420
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
423
424 return INumber;
425}
426
427IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
428 IRInstructionData *FirstInstIt,
429 IRInstructionData *LastInstIt)
430 : StartIdx(StartIdx), Len(Len) {
431
432 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433 assert(LastInstIt != nullptr && "Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 == static_cast<unsigned>(std::distance(
437 iterator(FirstInstIt), iterator(LastInstIt))) &&
438 "Length of the first and last IRInstructionData do not match the "
439 "given length");
440
441 // We iterate over the given instructions, and map each unique value
442 // to a unique number in the IRSimilarityCandidate ValueToNumber and
443 // NumberToValue maps. A constant get its own value globally, the individual
444 // uses of the constants are not considered to be unique.
445 //
446 // IR: Mapping Added:
447 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
448 // %add2 = add i32 %a, %1 %add2 -> 4
449 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
450 //
451 // when replace with global values, starting from 1, would be
452 //
453 // 3 = add i32 1, 2
454 // 4 = add i32 1, 3
455 // 6 = add i32 5, 2
456 unsigned LocalValNumber = 1;
458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459 // Map the operand values to an unsigned integer if it does not already
460 // have an unsigned integer assigned to it.
461 for (Value *Arg : ID->OperVals)
462 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
463 NumberToValue.try_emplace(LocalValNumber, Arg);
464 LocalValNumber++;
465 }
466
467 // Mapping the instructions to an unsigned integer if it is not already
468 // exist in the mapping.
469 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
470 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
471 LocalValNumber++;
472 }
473 }
474
475 // Setting the first and last instruction data pointers for the candidate. If
476 // we got through the entire for loop without hitting an assert, we know
477 // that both of these instructions are not nullptrs.
478 FirstInst = FirstInstIt;
479 LastInst = LastInstIt;
480
481 // Add the basic blocks contained in the set into the global value numbering.
483 getBasicBlocks(BBSet);
484 for (BasicBlock *BB : BBSet) {
485 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
486 NumberToValue.try_emplace(LocalValNumber, BB);
487 LocalValNumber++;
488 }
489 }
490}
491
493 const IRSimilarityCandidate &B) {
494 if (A.getLength() != B.getLength())
495 return false;
496
497 auto InstrDataForBoth =
498 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
499
500 return all_of(InstrDataForBoth,
501 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
502 IRInstructionData &A = std::get<0>(R);
503 IRInstructionData &B = std::get<1>(R);
504 if (!A.Legal || !B.Legal)
505 return false;
506 return isClose(A, B);
507 });
508}
509
510/// Determine if one or more of the assigned global value numbers for the
511/// operands in \p TargetValueNumbers is in the current mapping set for operand
512/// numbers in \p SourceOperands. The set of possible corresponding global
513/// value numbers are replaced with the most recent version of compatible
514/// values.
515///
516/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
517/// value number for the source IRInstructionCandidate.
518/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
519/// IRSimilarityCandidate global value numbers to a set of possible numbers in
520/// the target.
521/// \param [in] SourceOperands - The operands in the original
522/// IRSimilarityCandidate in the current instruction.
523/// \param [in] TargetValueNumbers - The global value numbers of the operands in
524/// the corresponding Instruction in the other IRSimilarityCandidate.
525/// \returns true if there exists a possible mapping between the source
526/// Instruction operands and the target Instruction operands, and false if not.
528 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
529 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
530 ArrayRef<Value *> &SourceOperands,
531 DenseSet<unsigned> &TargetValueNumbers){
532
533 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
534
535 unsigned ArgVal;
536 bool WasInserted;
537
538 // Iterate over the operands in the source IRSimilarityCandidate to determine
539 // whether there exists an operand in the other IRSimilarityCandidate that
540 // creates a valid mapping of Value to Value between the
541 // IRSimilarityCaniddates.
542 for (Value *V : SourceOperands) {
543 ArgVal = SourceValueToNumberMapping.find(V)->second;
544
545 // Instead of finding a current mapping, we attempt to insert a set.
546 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
547 std::make_pair(ArgVal, TargetValueNumbers));
548
549 // We need to iterate over the items in other IRSimilarityCandidate's
550 // Instruction to determine whether there is a valid mapping of
551 // Value to Value.
552 DenseSet<unsigned> NewSet;
553 for (unsigned &Curr : ValueMappingIt->second)
554 // If we can find the value in the mapping, we add it to the new set.
555 if (TargetValueNumbers.contains(Curr))
556 NewSet.insert(Curr);
557
558 // If we could not find a Value, return 0.
559 if (NewSet.empty())
560 return false;
561
562 // Otherwise replace the old mapping with the newly constructed one.
563 if (NewSet.size() != ValueMappingIt->second.size())
564 ValueMappingIt->second.swap(NewSet);
565
566 // We have reached no conclusions about the mapping, and cannot remove
567 // any items from the other operands, so we move to check the next operand.
568 if (ValueMappingIt->second.size() != 1)
569 continue;
570
571 unsigned ValToRemove = *ValueMappingIt->second.begin();
572 // When there is only one item left in the mapping for and operand, remove
573 // the value from the other operands. If it results in there being no
574 // mapping, return false, it means the mapping is wrong
575 for (Value *InnerV : SourceOperands) {
576 if (V == InnerV)
577 continue;
578
579 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
580 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
581 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
582 continue;
583
584 ValueMappingIt->second.erase(ValToRemove);
585 if (ValueMappingIt->second.empty())
586 return false;
587 }
588 }
589
590 return true;
591}
592
593/// Determine if operand number \p TargetArgVal is in the current mapping set
594/// for operand number \p SourceArgVal.
595///
596/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
597/// value numbers from source IRSimilarityCandidate to target
598/// IRSimilarityCandidate.
599/// \param [in] SourceArgVal The global value number for an operand in the
600/// in the original candidate.
601/// \param [in] TargetArgVal The global value number for the corresponding
602/// operand in the other candidate.
603/// \returns True if there exists a mapping and false if not.
605 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
606 unsigned SourceArgVal, unsigned TargetArgVal) {
607 // We are given two unsigned integers representing the global values of
608 // the operands in different IRSimilarityCandidates and a current mapping
609 // between the two.
610 //
611 // Source Operand GVN: 1
612 // Target Operand GVN: 2
613 // CurrentMapping: {1: {1, 2}}
614 //
615 // Since we have mapping, and the target operand is contained in the set, we
616 // update it to:
617 // CurrentMapping: {1: {2}}
618 // and can return true. But, if the mapping was
619 // CurrentMapping: {1: {3}}
620 // we would return false.
621
622 bool WasInserted;
624
625 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
626 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
627
628 // If we created a new mapping, then we are done.
629 if (WasInserted)
630 return true;
631
632 // If there is more than one option in the mapping set, and the target value
633 // is included in the mapping set replace that set with one that only includes
634 // the target value, as it is the only valid mapping via the non commutative
635 // instruction.
636
637 DenseSet<unsigned> &TargetSet = Val->second;
638 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
639 TargetSet.clear();
640 TargetSet.insert(TargetArgVal);
641 return true;
642 }
643
644 // Return true if we can find the value in the set.
645 return TargetSet.contains(TargetArgVal);
646}
647
650 // Iterators to keep track of where we are in the operands for each
651 // Instruction.
652 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
653 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
654 unsigned OperandLength = A.OperVals.size();
655
656 // For each operand, get the value numbering and ensure it is consistent.
657 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
658 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
659 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
660
661 // Attempt to add a set with only the target value. If there is no mapping
662 // we can create it here.
663 //
664 // For an instruction like a subtraction:
665 // IRSimilarityCandidateA: IRSimilarityCandidateB:
666 // %resultA = sub %a, %b %resultB = sub %d, %e
667 //
668 // We map %a -> %d and %b -> %e.
669 //
670 // And check to see whether their mapping is consistent in
671 // checkNumberingAndReplace.
672
673 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
674 return false;
675
676 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
677 return false;
678 }
679 return true;
680}
681
684 DenseSet<unsigned> ValueNumbersA;
685 DenseSet<unsigned> ValueNumbersB;
686
687 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
688 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
689 unsigned OperandLength = A.OperVals.size();
690
691 // Find the value number sets for the operands.
692 for (unsigned Idx = 0; Idx < OperandLength;
693 Idx++, VItA++, VItB++) {
694 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
695 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
696 }
697
698 // Iterate over the operands in the first IRSimilarityCandidate and make sure
699 // there exists a possible mapping with the operands in the second
700 // IRSimilarityCandidate.
701 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
702 A.ValueNumberMapping, A.OperVals,
703 ValueNumbersB))
704 return false;
705
706 // Iterate over the operands in the second IRSimilarityCandidate and make sure
707 // there exists a possible mapping with the operands in the first
708 // IRSimilarityCandidate.
709 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
710 B.ValueNumberMapping, B.OperVals,
711 ValueNumbersA))
712 return false;
713
714 return true;
715}
716
718 const unsigned InstValA, const unsigned &InstValB,
719 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
720 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
722 bool WasInserted;
723 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
724 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
725 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
726 return false;
727 else if (ValueMappingIt->second.size() != 1) {
728 for (unsigned OtherVal : ValueMappingIt->second) {
729 if (OtherVal == InstValB)
730 continue;
731 if (!ValueNumberMappingA.contains(OtherVal))
732 continue;
733 if (!ValueNumberMappingA[OtherVal].contains(InstValA))
734 continue;
735 ValueNumberMappingA[OtherVal].erase(InstValA);
736 }
737 ValueNumberMappingA.erase(ValueMappingIt);
738 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
739 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
740 }
741
742 return true;
743}
744
747 // Get the basic blocks the label refers to.
748 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
749 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
750
751 // Get the basic blocks contained in each region.
752 DenseSet<BasicBlock *> BasicBlockA;
753 DenseSet<BasicBlock *> BasicBlockB;
754 A.IRSC.getBasicBlocks(BasicBlockA);
755 B.IRSC.getBasicBlocks(BasicBlockB);
756
757 // Determine if the block is contained in the region.
758 bool AContained = BasicBlockA.contains(ABB);
759 bool BContained = BasicBlockB.contains(BBB);
760
761 // Both blocks need to be contained in the region, or both need to be outside
762 // the region.
763 if (AContained != BContained)
764 return false;
765
766 // If both are contained, then we need to make sure that the relative
767 // distance to the target blocks are the same.
768 if (AContained)
769 return A.RelativeLocation == B.RelativeLocation;
770 return true;
771}
772
774 const IRSimilarityCandidate &B) {
777 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
778}
779
784
787 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
788 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
789 if (A.getLength() != B.getLength())
790 return false;
791
792 if (A.ValueToNumber.size() != B.ValueToNumber.size())
793 return false;
794
795 iterator ItA = A.begin();
796 iterator ItB = B.begin();
797
798 // These ValueNumber Mapping sets create a create a mapping between the values
799 // in one candidate to values in the other candidate. If we create a set with
800 // one element, and that same element maps to the original element in the
801 // candidate we have a good mapping.
802
803 // Iterate over the instructions contained in each candidate
804 unsigned SectionLength = A.getStartIdx() + A.getLength();
805 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
806 ItA++, ItB++, Loc++) {
807 // Make sure the instructions are similar to one another.
808 if (!isClose(*ItA, *ItB))
809 return false;
810
811 Instruction *IA = ItA->Inst;
812 Instruction *IB = ItB->Inst;
813
814 if (!ItA->Legal || !ItB->Legal)
815 return false;
816
817 // Get the operand sets for the instructions.
818 ArrayRef<Value *> OperValsA = ItA->OperVals;
819 ArrayRef<Value *> OperValsB = ItB->OperVals;
820
821 unsigned InstValA = A.ValueToNumber.find(IA)->second;
822 unsigned InstValB = B.ValueToNumber.find(IB)->second;
823
824 // Ensure that the mappings for the instructions exists.
825 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
826 ValueNumberMappingB))
827 return false;
828
829 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
830 ValueNumberMappingA))
831 return false;
832
833 // We have different paths for commutative instructions and non-commutative
834 // instructions since commutative instructions could allow multiple mappings
835 // to certain values.
836 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
837 !isa<IntrinsicInst>(IA)) {
839 {A, OperValsA, ValueNumberMappingA},
840 {B, OperValsB, ValueNumberMappingB}))
841 return false;
842 continue;
843 }
844
845 // Handle the non-commutative cases.
847 {A, OperValsA, ValueNumberMappingA},
848 {B, OperValsB, ValueNumberMappingB}))
849 return false;
850
851 // Here we check that between two corresponding instructions,
852 // when referring to a basic block in the same region, the
853 // relative locations are the same. And, that the instructions refer to
854 // basic blocks outside the region in the same corresponding locations.
855
856 // We are able to make the assumption about blocks outside of the region
857 // since the target block labels are considered values and will follow the
858 // same number matching that we defined for the other instructions in the
859 // region. So, at this point, in each location we target a specific block
860 // outside the region, we are targeting a corresponding block in each
861 // analagous location in the region we are comparing to.
862 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
863 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
864 continue;
865
866 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
867 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
868 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
869 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
870
871 // Check to make sure that the number of operands, and branching locations
872 // between BranchInsts is the same.
873 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
874 ABL.size() != BBL.size())
875 return false;
876
877 assert(RelBlockLocsA.size() == ABL.size() &&
878 "Block information vectors not the same size.");
879 assert(RelBlockLocsB.size() == BBL.size() &&
880 "Block information vectors not the same size.");
881
882 ZippedRelativeLocationsT ZippedRelativeLocations =
883 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
884 if (any_of(ZippedRelativeLocations,
885 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
887 {A, std::get<0>(R), std::get<2>(R)},
888 {B, std::get<1>(R), std::get<3>(R)});
889 }))
890 return false;
891 }
892 return true;
893}
894
896 const IRSimilarityCandidate &B) {
897 auto DoesOverlap = [](const IRSimilarityCandidate &X,
898 const IRSimilarityCandidate &Y) {
899 // Check:
900 // XXXXXX X starts before Y ends
901 // YYYYYYY Y starts after X starts
902 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
903 };
904
905 return DoesOverlap(A, B) || DoesOverlap(B, A);
906}
907
908void IRSimilarityIdentifier::populateMapper(
909 Module &M, std::vector<IRInstructionData *> &InstrList,
910 std::vector<unsigned> &IntegerMapping) {
911
912 std::vector<IRInstructionData *> InstrListForModule;
913 std::vector<unsigned> IntegerMappingForModule;
914 // Iterate over the functions in the module to map each Instruction in each
915 // BasicBlock to an unsigned integer.
916 Mapper.initializeForBBs(M);
917
918 for (Function &F : M) {
919
920 if (F.empty())
921 continue;
922
923 for (BasicBlock &BB : F) {
924
925 // BB has potential to have similarity since it has a size greater than 2
926 // and can therefore match other regions greater than 2. Map it to a list
927 // of unsigned integers.
928 Mapper.convertToUnsignedVec(BB, InstrListForModule,
929 IntegerMappingForModule);
930 }
931
932 BasicBlock::iterator It = F.begin()->end();
933 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
934 true);
935 if (InstrListForModule.size() > 0)
936 Mapper.IDL->push_back(*InstrListForModule.back());
937 }
938
939 // Insert the InstrListForModule at the end of the overall InstrList so that
940 // we can have a long InstrList for the entire set of Modules being analyzed.
941 llvm::append_range(InstrList, InstrListForModule);
942 // Do the same as above, but for IntegerMapping.
943 llvm::append_range(IntegerMapping, IntegerMappingForModule);
944}
945
946void IRSimilarityIdentifier::populateMapper(
947 ArrayRef<std::unique_ptr<Module>> &Modules,
948 std::vector<IRInstructionData *> &InstrList,
949 std::vector<unsigned> &IntegerMapping) {
950
951 // Iterate over, and map the instructions in each module.
952 for (const std::unique_ptr<Module> &M : Modules)
953 populateMapper(*M, InstrList, IntegerMapping);
954}
955
956/// From a repeated subsequence, find all the different instances of the
957/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
958/// the IRInstructionData in subsequence.
959///
960/// \param [in] Mapper - The instruction mapper for basic correctness checks.
961/// \param [in] InstrList - The vector that holds the instruction data.
962/// \param [in] IntegerMapping - The vector that holds the mapped integers.
963/// \param [out] CandsForRepSubstring - The vector to store the generated
964/// IRSimilarityCandidates.
966 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
967 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
968 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
969
970 unsigned StringLen = RS.Length;
971 if (StringLen < 2)
972 return;
973
974 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
975 for (const unsigned &StartIdx : RS.StartIndices) {
976 unsigned EndIdx = StartIdx + StringLen - 1;
977
978 // Check that this subsequence does not contain an illegal instruction.
979 bool ContainsIllegal = false;
980 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
981 unsigned Key = IntegerMapping[CurrIdx];
982 if (Key > Mapper.IllegalInstrNumber) {
983 ContainsIllegal = true;
984 break;
985 }
986 }
987
988 // If we have an illegal instruction, we should not create an
989 // IRSimilarityCandidate for this region.
990 if (ContainsIllegal)
991 continue;
992
993 // We are getting iterators to the instructions in this region of code
994 // by advancing the start and end indices from the start of the
995 // InstrList.
996 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
997 std::advance(StartIt, StartIdx);
998 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
999 std::advance(EndIt, EndIdx);
1000
1001 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1002 }
1003}
1004
1006 IRSimilarityCandidate &SourceCand,
1007 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1008 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1009 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1010 "Base canonical relationship is empty!");
1011 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1012 "Base canonical relationship is empty!");
1013
1014 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1015 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1016
1017 DenseSet<unsigned> UsedGVNs;
1018 // Iterate over the mappings provided from this candidate to SourceCand. We
1019 // are then able to map the GVN in this candidate to the same canonical number
1020 // given to the corresponding GVN in SourceCand.
1021 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1022 unsigned SourceGVN = GVNMapping.first;
1023
1024 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1025
1026 unsigned ResultGVN;
1027 // We need special handling if we have more than one potential value. This
1028 // means that there are at least two GVNs that could correspond to this GVN.
1029 // This could lead to potential swapping later on, so we make a decision
1030 // here to ensure a one-to-one mapping.
1031 if (GVNMapping.second.size() > 1) {
1032 bool Found = false;
1033 for (unsigned Val : GVNMapping.second) {
1034 // We make sure the target value number hasn't already been reserved.
1035 if (UsedGVNs.contains(Val))
1036 continue;
1037
1038 // We make sure that the opposite mapping is still consistent.
1040 FromSourceMapping.find(Val);
1041
1042 if (!It->second.contains(SourceGVN))
1043 continue;
1044
1045 // We pick the first item that satisfies these conditions.
1046 Found = true;
1047 ResultGVN = Val;
1048 break;
1049 }
1050
1051 assert(Found && "Could not find matching value for source GVN");
1052 (void)Found;
1053
1054 } else
1055 ResultGVN = *GVNMapping.second.begin();
1056
1057 // Whatever GVN is found, we mark it as used.
1058 UsedGVNs.insert(ResultGVN);
1059
1060 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1061 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1062 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1063 }
1064
1066 getBasicBlocks(BBSet);
1067 // Find canonical numbers for the BasicBlocks in the current candidate.
1068 // This is done by finding the corresponding value for the first instruction
1069 // in the block in the current candidate, finding the matching value in the
1070 // source candidate. Then by finding the parent of this value, use the
1071 // canonical number of the block in the source candidate for the canonical
1072 // number in the current candidate.
1073 for (BasicBlock *BB : BBSet) {
1074 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1075
1076 // We can skip the BasicBlock if the canonical numbering has already been
1077 // found in a separate instruction.
1078 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1079 continue;
1080
1081 // If the basic block is the starting block, then the shared instruction may
1082 // not be the first instruction in the block, it will be the first
1083 // instruction in the similarity region.
1084 Value *FirstOutlineInst = BB == getStartBB()
1086 : &*BB->instructionsWithoutDebug().begin();
1087
1088 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1089 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1090 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1091 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1092 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1093 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1094 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1095 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1096 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1097 }
1098}
1099
1101 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1102 IRSimilarityCandidate &TargetCandLarge) {
1103 assert(!SourceCand.CanonNumToNumber.empty() &&
1104 "Canonical Relationship is non-empty");
1105 assert(!SourceCand.NumberToCanonNum.empty() &&
1106 "Canonical Relationship is non-empty");
1107
1108 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1109 "Canonical Relationship is non-empty");
1110 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1111 "Canonical Relationship is non-empty");
1112
1113 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1114 "Canonical Relationship is non-empty");
1115 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1116 "Canonical Relationship is non-empty");
1117
1118 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1119 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1120
1121 // We're going to use the larger candidates as a "bridge" to create the
1122 // canonical number for the target candidate since we have idetified two
1123 // candidates as subsequences of larger sequences, and therefore must be
1124 // structurally similar.
1125 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1126 Value *CurrVal = ValueNumPair.first;
1127 unsigned TargetCandGVN = ValueNumPair.second;
1128
1129 // Find the numbering in the large candidate that surrounds the
1130 // current candidate.
1131 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1132 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1133
1134 // Get the canonical numbering in the large target candidate.
1135 std::optional<unsigned> OTargetCandCanon =
1136 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1137 assert(OTargetCandCanon.has_value() &&
1138 "Canononical Number not found for GVN");
1139
1140 // Get the GVN in the large source candidate from the canonical numbering.
1141 std::optional<unsigned> OLargeSourceGVN =
1142 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1143 assert(OLargeSourceGVN.has_value() &&
1144 "GVN Number not found for Canonical Number");
1145
1146 // Get the Value from the GVN in the large source candidate.
1147 std::optional<Value *> OLargeSourceV =
1148 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1149 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1150
1151 // Get the GVN number for the Value in the source candidate.
1152 std::optional<unsigned> OSourceGVN =
1153 SourceCand.getGVN(OLargeSourceV.value());
1154 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1155
1156 // Get the canonical numbering from the GVN/
1157 std::optional<unsigned> OSourceCanon =
1158 SourceCand.getCanonicalNum(OSourceGVN.value());
1159 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1160
1161 // Insert the canonical numbering and GVN pair into their respective
1162 // mappings.
1163 CanonNumToNumber.insert(
1164 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1165 NumberToCanonNum.insert(
1166 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1167 }
1168}
1169
1171 IRSimilarityCandidate &CurrCand) {
1172 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1175 "Canonical Relationship is non-empty");
1176
1177 unsigned CanonNum = 0;
1178 // Iterate over the value numbers found, the order does not matter in this
1179 // case.
1180 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1181 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1182 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1183 CanonNum++;
1184 }
1185}
1186
1187/// Look for larger IRSimilarityCandidates From the previously matched
1188/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1189/// an overlap, return a pair of structurally similar, larger
1190/// IRSimilarityCandidates.
1191///
1192/// \param [in] CandA - The first candidate we are trying to determine the
1193/// structure of.
1194/// \param [in] CandB - The second candidate we are trying to determine the
1195/// structure of.
1196/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1197/// a circuit to the IRSimilarityCandidates that include this instruction.
1198/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1199/// number representing the structural group assigned to it.
1200static std::optional<
1201 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1204 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1206 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1207 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1208 DenseSet<unsigned> IncludedGroupsA;
1209 DenseSet<unsigned> IncludedGroupsB;
1210
1211 // Find the overall similarity group numbers that fully contain the candidate,
1212 // and record the larger candidate for each group.
1213 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1214 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1215 Result;
1216
1217 unsigned CandAStart = CandA.getStartIdx();
1218 unsigned CandAEnd = CandA.getEndIdx();
1219 unsigned CandBStart = CandB.getStartIdx();
1220 unsigned CandBEnd = CandB.getEndIdx();
1221 if (IdxToCandidateIt == IndexToIncludedCand.end())
1222 return Result;
1223 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1224 if (MatchedCand->getStartIdx() > CandAStart ||
1225 (MatchedCand->getEndIdx() < CandAEnd))
1226 continue;
1227 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1228 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1229 IncludedGroupsA.insert(GroupNum);
1230 }
1231
1232 // Find the overall similarity group numbers that fully contain the next
1233 // candidate, and record the larger candidate for each group.
1234 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1235 if (IdxToCandidateIt == IndexToIncludedCand.end())
1236 return Result;
1237 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1238 if (MatchedCand->getStartIdx() > CandBStart ||
1239 MatchedCand->getEndIdx() < CandBEnd)
1240 continue;
1241 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1242 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1243 IncludedGroupsB.insert(GroupNum);
1244 }
1245
1246 // Find the intersection between the two groups, these are the groups where
1247 // the larger candidates exist.
1248 set_intersect(IncludedGroupsA, IncludedGroupsB);
1249
1250 // If there is no intersection between the sets, then we cannot determine
1251 // whether or not there is a match.
1252 if (IncludedGroupsA.empty())
1253 return Result;
1254
1255 // Create a pair that contains the larger candidates.
1256 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1257 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1258 Result = std::make_pair(ItA->second, ItB->second);
1259 return Result;
1260}
1261
1262/// From the list of IRSimilarityCandidates, perform a comparison between each
1263/// IRSimilarityCandidate to determine if there are overlapping
1264/// IRInstructionData, or if they do not have the same structure.
1265///
1266/// \param [in] CandsForRepSubstring - The vector containing the
1267/// IRSimilarityCandidates.
1268/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1269/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1270/// vector are structurally similar to one another.
1271/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1272/// a circuit to the IRSimilarityCandidates that include this instruction.
1273/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1274/// number representing the structural group assigned to it.
1276 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1277 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1278 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1280 ) {
1281 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1282 InnerCandEndIt;
1283
1284 // IRSimilarityCandidates each have a structure for operand use. It is
1285 // possible that two instances of the same subsequences have different
1286 // structure. Each type of structure found is assigned a number. This
1287 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1288 // discovered it fits within.
1290
1291 // Find the compatibility from each candidate to the others to determine
1292 // which candidates overlap and which have the same structure by mapping
1293 // each structure to a different group.
1294 bool SameStructure;
1295 bool Inserted;
1296 unsigned CurrentGroupNum = 0;
1297 unsigned OuterGroupNum;
1301
1302 // Iterate over the candidates to determine its structural and overlapping
1303 // compatibility with other instructions
1304 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1305 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1306 for (CandIt = CandsForRepSubstring.begin(),
1307 CandEndIt = CandsForRepSubstring.end();
1308 CandIt != CandEndIt; CandIt++) {
1309
1310 // Determine if it has an assigned structural group already.
1311 CandToGroupIt = CandToGroup.find(&*CandIt);
1312 if (CandToGroupIt == CandToGroup.end()) {
1313 // If not, we assign it one, and add it to our mapping.
1314 std::tie(CandToGroupIt, Inserted) =
1315 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1316 }
1317
1318 // Get the structural group number from the iterator.
1319 OuterGroupNum = CandToGroupIt->second;
1320
1321 // Check if we already have a list of IRSimilarityCandidates for the current
1322 // structural group. Create one if one does not exist.
1323 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1324 if (CurrentGroupPair == StructuralGroups.end()) {
1326 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1327 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1328 }
1329
1330 // Iterate over the IRSimilarityCandidates following the current
1331 // IRSimilarityCandidate in the list to determine whether the two
1332 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1333 // of IRSimilarityCandidates.
1334 for (InnerCandIt = std::next(CandIt),
1335 InnerCandEndIt = CandsForRepSubstring.end();
1336 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1337
1338 // We check if the inner item has a group already, if it does, we skip it.
1339 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1340 if (CandToGroupItInner != CandToGroup.end())
1341 continue;
1342
1343 // Check if we have found structural similarity between two candidates
1344 // that fully contains the first and second candidates.
1345 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1346 LargerPair = CheckLargerCands(
1347 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1348
1349 // If a pair was found, it means that we can assume that these smaller
1350 // substrings are also structurally similar. Use the larger candidates to
1351 // determine the canonical mapping between the two sections.
1352 if (LargerPair.has_value()) {
1353 SameStructure = true;
1354 InnerCandIt->createCanonicalRelationFrom(
1355 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1356 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1357 CurrentGroupPair->second.push_back(*InnerCandIt);
1358 continue;
1359 }
1360
1361 // Otherwise we determine if they have the same structure and add it to
1362 // vector if they match.
1363 ValueNumberMappingA.clear();
1364 ValueNumberMappingB.clear();
1366 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1367 if (!SameStructure)
1368 continue;
1369
1370 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1371 ValueNumberMappingB);
1372 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1373 CurrentGroupPair->second.push_back(*InnerCandIt);
1374 }
1375 }
1376}
1377
1378void IRSimilarityIdentifier::findCandidates(
1379 std::vector<IRInstructionData *> &InstrList,
1380 std::vector<unsigned> &IntegerMapping) {
1381 SuffixTree ST(IntegerMapping);
1382
1383 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1384 std::vector<SimilarityGroup> NewCandidateGroups;
1385
1386 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1389
1390 // Iterate over the subsequences found by the Suffix Tree to create
1391 // IRSimilarityCandidates for each repeated subsequence and determine which
1392 // instances are structurally similar to one another.
1393
1394 // Sort the suffix tree from longest substring to shortest.
1395 std::vector<SuffixTree::RepeatedSubstring> RSes;
1396 for (SuffixTree::RepeatedSubstring &RS : ST)
1397 RSes.push_back(RS);
1398
1400 const SuffixTree::RepeatedSubstring &RHS) {
1401 return LHS.Length > RHS.Length;
1402 });
1403 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1404 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1405 CandsForRepSubstring);
1406
1407 if (CandsForRepSubstring.size() < 2)
1408 continue;
1409
1410 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1411 IndexToIncludedCand, CandToGroup);
1412 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1413 // We only add the group if it contains more than one
1414 // IRSimilarityCandidate. If there is only one, that means there is no
1415 // other repeated subsequence with the same structure.
1416 if (Group.second.size() > 1) {
1417 SimilarityCandidates->push_back(Group.second);
1418 // Iterate over each candidate in the group, and add an entry for each
1419 // instruction included with a mapping to a set of
1420 // IRSimilarityCandidates that include that instruction.
1421 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1422 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1423 Idx <= Edx; ++Idx)
1424 IndexToIncludedCand[Idx].insert(&IRCand);
1425 // Add mapping of candidate to the overall similarity group number.
1426 CandToGroup.insert(
1427 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1428 }
1429 }
1430 }
1431
1432 CandsForRepSubstring.clear();
1433 StructuralGroups.clear();
1434 NewCandidateGroups.clear();
1435 }
1436}
1437
1439 ArrayRef<std::unique_ptr<Module>> Modules) {
1441
1442 std::vector<IRInstructionData *> InstrList;
1443 std::vector<unsigned> IntegerMapping;
1444 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1445 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1446 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1447 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1448 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1449
1450 populateMapper(Modules, InstrList, IntegerMapping);
1451 findCandidates(InstrList, IntegerMapping);
1452
1453 return *SimilarityCandidates;
1454}
1455
1458 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1459 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1460 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1461 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1462 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1463
1464 std::vector<IRInstructionData *> InstrList;
1465 std::vector<unsigned> IntegerMapping;
1466
1467 populateMapper(M, InstrList, IntegerMapping);
1468 findCandidates(InstrList, IntegerMapping);
1469
1470 return *SimilarityCandidates;
1471}
1472
1474 "ir-similarity-identifier", false, true)
1475
1477 : ModulePass(ID) {
1480}
1481
1485 false));
1486 return false;
1487}
1488
1490 IRSI.reset();
1491 return false;
1492}
1493
1495 IRSI->findSimilarity(M);
1496 return false;
1497}
1498
1499AnalysisKey IRSimilarityAnalysis::Key;
1504 false);
1505 IRSI.findSimilarity(M);
1506 return IRSI;
1507}
1508
1512 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1513 IRSI.getSimilarity();
1514
1515 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1516 OS << CandVec.size() << " candidates of length "
1517 << CandVec.begin()->getLength() << ". Found in: \n";
1518 for (IRSimilarityCandidate &Cand : CandVec) {
1519 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1520 << ", Basic Block: ";
1521 if (Cand.front()->Inst->getParent()->getName().str() == "")
1522 OS << "(unnamed)";
1523 else
1524 OS << Cand.front()->Inst->getParent()->getName().str();
1525 OS << "\n Start Instruction: ";
1526 Cand.frontInstruction()->print(OS);
1527 OS << "\n End Instruction: ";
1528 Cand.backInstruction()->print(OS);
1529 OS << "\n";
1530 }
1531 }
1532
1533 return PreservedAnalyses::all();
1534}
1535
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines generic set operations that may be used on set's of different types,...
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void swap(DenseMap &RHS)
Definition: DenseMap.h:769
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Class to represent function types.
Definition: DerivedTypes.h:105
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
size_t size() const
Definition: SmallVector.h:78
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:229
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
LLVM Value Representation.
Definition: Value.h:74
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:187
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:193
const ParentTy * getParent() const
Definition: ilist_node.h:32
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
Definition: simple_ilist.h:97
void push_back(reference Node)
Insert a node at the back; never copies.
Definition: simple_ilist.h:153
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:46
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Intrinsics.cpp:606
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:42
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:38
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:46
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)
cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
This provides the utilities for hashing an Instruction to an unsigned integer.
StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
Definition: SuffixTree.h:49
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
Definition: SuffixTree.h:54
unsigned Length
The length of the string.
Definition: SuffixTree.h:51