LLVM 23.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
39static cl::opt<bool>
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
81 llvm::append_range(OperVals, PN->blocks());
82}
83
86
88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89 assert((isa<UncondBrInst, CondBrInst>(Inst)) && "Instruction must be branch");
90
92
93 BBNumIt = BasicBlockToInteger.find(Inst->getParent());
94 assert(BBNumIt != BasicBlockToInteger.end() &&
95 "Could not find location for BasicBlock!");
96
97 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
98
99 for (Value *V : getBlockOperVals()) {
101 BBNumIt = BasicBlockToInteger.find(Successor);
102 assert(BBNumIt != BasicBlockToInteger.end() &&
103 "Could not find number for BasicBlock!");
104 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105
106 int Relative = OtherBlockNumber - CurrentBlockNumber;
107 RelativeBlockLocations.push_back(Relative);
108 }
109}
110
113 return OperVals;
115 return ArrayRef<Value *>(OperVals).drop_front(1);
116
117 if (PHINode *PN = dyn_cast<PHINode>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), PN->getNumIncomingValues()),
120 OperVals.end()
121 );
122
123 llvm_unreachable("Instruction must be branch or PHINode");
124}
125
126void IRInstructionData::setCalleeName(bool MatchByName) {
128 assert(CI && "Instruction must be call");
129
130 CalleeName = "";
132 // To hash intrinsics, we use the opcode, and types like the other
133 // instructions, but also, the Intrinsic ID, and the Name of the
134 // intrinsic.
135 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
136 FunctionType *FT = II->getFunctionType();
137 // If there is an overloaded name, we have to use the complex version
138 // of getName to get the entire string.
139 if (Intrinsic::isOverloaded(IntrinsicID))
140 CalleeName =
141 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
142 // If there is not an overloaded name, we only need to use this version.
143 else
144 CalleeName = Intrinsic::getName(IntrinsicID).str();
145
146 return;
147 }
148
149 if (!CI->isIndirectCall() && MatchByName)
151}
152
154 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
155 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
156
159
160 BBNumIt = BasicBlockToInteger.find(PN->getParent());
161 assert(BBNumIt != BasicBlockToInteger.end() &&
162 "Could not find location for BasicBlock!");
163
164 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
165
166 // Convert the incoming blocks of the PHINode to an integer value, based on
167 // the relative distances between the current block and the incoming block.
168 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
170 BBNumIt = BasicBlockToInteger.find(Incoming);
171 assert(BBNumIt != BasicBlockToInteger.end() &&
172 "Could not find number for BasicBlock!");
173 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
174
175 int Relative = OtherBlockNumber - CurrentBlockNumber;
176 RelativeBlockLocations.push_back(Relative);
177 }
178}
179
181 switch (CI->getPredicate()) {
190 return CI->getSwappedPredicate();
191 default:
192 return CI->getPredicate();
193 }
194}
195
198 "Can only get a predicate from a compare instruction");
199
201 return *RevisedPredicate;
202
203 return cast<CmpInst>(Inst)->getPredicate();
204}
205
208 "Can only get a name from a call instruction");
209
210 assert(CalleeName && "CalleeName has not been set");
211
212 return *CalleeName;
213}
214
216 const IRInstructionData &B) {
217
218 if (!A.Legal || !B.Legal)
219 return false;
220
221 // Check if we are performing the same sort of operation on the same types
222 // but not on the same values.
223 if (!A.Inst->isSameOperationAs(B.Inst)) {
224 // If there is a predicate, this means that either there is a swapped
225 // predicate, or that the types are different, we want to make sure that
226 // the predicates are equivalent via swapping.
227 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
228
229 if (A.getPredicate() != B.getPredicate())
230 return false;
231
232 // If the predicates are the same via swap, make sure that the types are
233 // still the same.
234 auto ZippedTypes = zip(A.OperVals, B.OperVals);
235
236 return all_of(
237 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
238 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
239 });
240 }
241
242 return false;
243 }
244
245 // Since any GEP Instruction operands after the first operand cannot be
246 // defined by a register, we must make sure that the operands after the first
247 // are the same in the two instructions
248 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
249 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
250
251 // If the instructions do not have the same inbounds restrictions, we do
252 // not consider them the same.
253 if (GEP->isInBounds() != OtherGEP->isInBounds())
254 return false;
255
256 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
257
258 // We increment here since we do not care about the first instruction,
259 // we only care about the following operands since they must be the
260 // exact same to be considered similar.
261 return all_of(drop_begin(ZippedOperands),
262 [](std::tuple<llvm::Use &, llvm::Use &> R) {
263 return std::get<0>(R) == std::get<1>(R);
264 });
265 }
266
267 // If the instructions are functions calls, we make sure that the function
268 // name is the same. We already know that the types are since is
269 // isSameOperationAs is true.
270 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
271 if (A.getCalleeName() != B.getCalleeName())
272 return false;
273 }
274
277 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
278 return false;
279
280 return true;
281}
282
283// TODO: This is the same as the MachineOutliner, and should be consolidated
284// into the same interface.
286 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
287 std::vector<unsigned> &IntegerMapping) {
288 BasicBlock::iterator It = BB.begin();
289
290 std::vector<unsigned> IntegerMappingForBB;
291 std::vector<IRInstructionData *> InstrListForBB;
292
293 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
294 switch (InstClassifier.visit(*It)) {
295 case InstrType::Legal:
296 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
297 break;
299 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
300 break;
302 AddedIllegalLastTime = false;
303 break;
304 }
305 }
306
308 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
309 for (IRInstructionData *ID : InstrListForBB)
310 this->IDL->push_back(*ID);
311 llvm::append_range(InstrList, InstrListForBB);
312 llvm::append_range(IntegerMapping, IntegerMappingForBB);
313}
314
315// TODO: This is the same as the MachineOutliner, and should be consolidated
316// into the same interface.
318 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
319 std::vector<IRInstructionData *> &InstrListForBB) {
320 // We added something legal, so we should unset the AddedLegalLastTime
321 // flag.
322 AddedIllegalLastTime = false;
323
324 // If we have at least two adjacent legal instructions (which may have
325 // invisible instructions in between), remember that.
327 HaveLegalRange = true;
329
330 // Get the integer for this instruction or give it the current
331 // LegalInstrNumber.
333 InstrListForBB.push_back(ID);
334
336 ID->setBranchSuccessors(BasicBlockToInteger);
337
338 if (isa<CallInst>(*It))
339 ID->setCalleeName(EnableMatchCallsByName);
340
341 if (isa<PHINode>(*It))
342 ID->setPHIPredecessors(BasicBlockToInteger);
343
344 // Add to the instruction list
345 bool WasInserted;
347 ResultIt;
348 std::tie(ResultIt, WasInserted) =
349 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
350 unsigned INumber = ResultIt->second;
351
352 // There was an insertion.
353 if (WasInserted)
355
356 IntegerMappingForBB.push_back(INumber);
357
358 // Make sure we don't overflow or use any integers reserved by the DenseMap.
360 "Instruction mapping overflow!");
361
363 "Tried to assign DenseMap tombstone or empty key to instruction.");
365 "Tried to assign DenseMap tombstone or empty key to instruction.");
366
367 return INumber;
368}
369
375
380
385
386// TODO: This is the same as the MachineOutliner, and should be consolidated
387// into the same interface.
389 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
390 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
391 // Can't combine an illegal instruction. Set the flag.
393
394 // Only add one illegal number per range of legal numbers.
396 return IllegalInstrNumber;
397
398 IRInstructionData *ID = nullptr;
399 if (!End)
400 ID = allocateIRInstructionData(*It, false, *IDL);
401 else
403 InstrListForBB.push_back(ID);
404
405 // Remember that we added an illegal number last time.
407 unsigned INumber = IllegalInstrNumber;
408 IntegerMappingForBB.push_back(IllegalInstrNumber--);
409
411 "Instruction mapping overflow!");
412
414 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
415
417 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
418
419 return INumber;
420}
421
422IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
423 IRInstructionData *FirstInstIt,
424 IRInstructionData *LastInstIt)
425 : StartIdx(StartIdx), Len(Len) {
426
427 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
428 assert(LastInstIt != nullptr && "Instruction is nullptr!");
429 assert(StartIdx + Len > StartIdx &&
430 "Overflow for IRSimilarityCandidate range?");
431 assert(Len - 1 == static_cast<unsigned>(std::distance(
432 iterator(FirstInstIt), iterator(LastInstIt))) &&
433 "Length of the first and last IRInstructionData do not match the "
434 "given length");
435
436 // We iterate over the given instructions, and map each unique value
437 // to a unique number in the IRSimilarityCandidate ValueToNumber and
438 // NumberToValue maps. A constant get its own value globally, the individual
439 // uses of the constants are not considered to be unique.
440 //
441 // IR: Mapping Added:
442 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
443 // %add2 = add i32 %a, %1 %add2 -> 4
444 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
445 //
446 // when replace with global values, starting from 1, would be
447 //
448 // 3 = add i32 1, 2
449 // 4 = add i32 1, 3
450 // 6 = add i32 5, 2
451 unsigned LocalValNumber = 1;
453 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
454 // Map the operand values to an unsigned integer if it does not already
455 // have an unsigned integer assigned to it.
456 for (Value *Arg : ID->OperVals)
457 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
458 NumberToValue.try_emplace(LocalValNumber, Arg);
459 LocalValNumber++;
460 }
461
462 // Mapping the instructions to an unsigned integer if it is not already
463 // exist in the mapping.
464 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
465 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
466 LocalValNumber++;
467 }
468 }
469
470 // Setting the first and last instruction data pointers for the candidate. If
471 // we got through the entire for loop without hitting an assert, we know
472 // that both of these instructions are not nullptrs.
473 FirstInst = FirstInstIt;
474 LastInst = LastInstIt;
475
476 // Add the basic blocks contained in the set into the global value numbering.
478 getBasicBlocks(BBSet);
479 for (BasicBlock *BB : BBSet) {
480 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
481 NumberToValue.try_emplace(LocalValNumber, BB);
482 LocalValNumber++;
483 }
484 }
485}
486
488 const IRSimilarityCandidate &B) {
489 if (A.getLength() != B.getLength())
490 return false;
491
492 auto InstrDataForBoth =
493 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
494
495 return all_of(InstrDataForBoth,
496 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
497 IRInstructionData &A = std::get<0>(R);
498 IRInstructionData &B = std::get<1>(R);
499 if (!A.Legal || !B.Legal)
500 return false;
501 return isClose(A, B);
502 });
503}
504
505/// Determine if one or more of the assigned global value numbers for the
506/// operands in \p TargetValueNumbers is in the current mapping set for operand
507/// numbers in \p SourceOperands. The set of possible corresponding global
508/// value numbers are replaced with the most recent version of compatible
509/// values.
510///
511/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
512/// value number for the source IRInstructionCandidate.
513/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
514/// IRSimilarityCandidate global value numbers to a set of possible numbers in
515/// the target.
516/// \param [in] SourceOperands - The operands in the original
517/// IRSimilarityCandidate in the current instruction.
518/// \param [in] TargetValueNumbers - The global value numbers of the operands in
519/// the corresponding Instruction in the other IRSimilarityCandidate.
520/// \returns true if there exists a possible mapping between the source
521/// Instruction operands and the target Instruction operands, and false if not.
523 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
524 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
525 ArrayRef<Value *> &SourceOperands,
526 DenseSet<unsigned> &TargetValueNumbers){
527
529
530 unsigned ArgVal;
531 bool WasInserted;
532
533 // Iterate over the operands in the source IRSimilarityCandidate to determine
534 // whether there exists an operand in the other IRSimilarityCandidate that
535 // creates a valid mapping of Value to Value between the
536 // IRSimilarityCaniddates.
537 for (Value *V : SourceOperands) {
538 ArgVal = SourceValueToNumberMapping.find(V)->second;
539
540 // Instead of finding a current mapping, we attempt to insert a set.
541 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
542 std::make_pair(ArgVal, TargetValueNumbers));
543
544 // We need to iterate over the items in other IRSimilarityCandidate's
545 // Instruction to determine whether there is a valid mapping of
546 // Value to Value.
547 DenseSet<unsigned> NewSet;
548 for (unsigned &Curr : ValueMappingIt->second)
549 // If we can find the value in the mapping, we add it to the new set.
550 if (TargetValueNumbers.contains(Curr))
551 NewSet.insert(Curr);
552
553 // If we could not find a Value, return 0.
554 if (NewSet.empty())
555 return false;
556
557 // Otherwise replace the old mapping with the newly constructed one.
558 if (NewSet.size() != ValueMappingIt->second.size())
559 ValueMappingIt->second.swap(NewSet);
560
561 // We have reached no conclusions about the mapping, and cannot remove
562 // any items from the other operands, so we move to check the next operand.
563 if (ValueMappingIt->second.size() != 1)
564 continue;
565
566 unsigned ValToRemove = *ValueMappingIt->second.begin();
567 // When there is only one item left in the mapping for and operand, remove
568 // the value from the other operands. If it results in there being no
569 // mapping, return false, it means the mapping is wrong
570 for (Value *InnerV : SourceOperands) {
571 if (V == InnerV)
572 continue;
573
574 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
575 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
576 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
577 continue;
578
579 ValueMappingIt->second.erase(ValToRemove);
580 if (ValueMappingIt->second.empty())
581 return false;
582 }
583 }
584
585 return true;
586}
587
588/// Determine if operand number \p TargetArgVal is in the current mapping set
589/// for operand number \p SourceArgVal.
590///
591/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
592/// value numbers from source IRSimilarityCandidate to target
593/// IRSimilarityCandidate.
594/// \param [in] SourceArgVal The global value number for an operand in the
595/// in the original candidate.
596/// \param [in] TargetArgVal The global value number for the corresponding
597/// operand in the other candidate.
598/// \returns True if there exists a mapping and false if not.
600 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
601 unsigned SourceArgVal, unsigned TargetArgVal) {
602 // We are given two unsigned integers representing the global values of
603 // the operands in different IRSimilarityCandidates and a current mapping
604 // between the two.
605 //
606 // Source Operand GVN: 1
607 // Target Operand GVN: 2
608 // CurrentMapping: {1: {1, 2}}
609 //
610 // Since we have mapping, and the target operand is contained in the set, we
611 // update it to:
612 // CurrentMapping: {1: {2}}
613 // and can return true. But, if the mapping was
614 // CurrentMapping: {1: {3}}
615 // we would return false.
616
617 bool WasInserted;
619
620 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
621 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
622
623 // If we created a new mapping, then we are done.
624 if (WasInserted)
625 return true;
626
627 // If there is more than one option in the mapping set, and the target value
628 // is included in the mapping set replace that set with one that only includes
629 // the target value, as it is the only valid mapping via the non commutative
630 // instruction.
631
632 DenseSet<unsigned> &TargetSet = Val->second;
633 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
634 TargetSet.clear();
635 TargetSet.insert(TargetArgVal);
636 return true;
637 }
638
639 // Return true if we can find the value in the set.
640 return TargetSet.contains(TargetArgVal);
641}
642
645 // Iterators to keep track of where we are in the operands for each
646 // Instruction.
647 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
648 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
649 unsigned OperandLength = A.OperVals.size();
650
651 // For each operand, get the value numbering and ensure it is consistent.
652 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
653 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
654 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
655
656 // Attempt to add a set with only the target value. If there is no mapping
657 // we can create it here.
658 //
659 // For an instruction like a subtraction:
660 // IRSimilarityCandidateA: IRSimilarityCandidateB:
661 // %resultA = sub %a, %b %resultB = sub %d, %e
662 //
663 // We map %a -> %d and %b -> %e.
664 //
665 // And check to see whether their mapping is consistent in
666 // checkNumberingAndReplace.
667
668 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
669 return false;
670
671 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
672 return false;
673 }
674 return true;
675}
676
679 DenseSet<unsigned> ValueNumbersA;
680 DenseSet<unsigned> ValueNumbersB;
681
682 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
683 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
684 unsigned OperandLength = A.OperVals.size();
685
686 // Find the value number sets for the operands.
687 for (unsigned Idx = 0; Idx < OperandLength;
688 Idx++, VItA++, VItB++) {
689 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
690 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
691 }
692
693 // Iterate over the operands in the first IRSimilarityCandidate and make sure
694 // there exists a possible mapping with the operands in the second
695 // IRSimilarityCandidate.
696 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
697 A.ValueNumberMapping, A.OperVals,
698 ValueNumbersB))
699 return false;
700
701 // Iterate over the operands in the second IRSimilarityCandidate and make sure
702 // there exists a possible mapping with the operands in the first
703 // IRSimilarityCandidate.
704 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
705 B.ValueNumberMapping, B.OperVals,
706 ValueNumbersA))
707 return false;
708
709 return true;
710}
711
713 const unsigned InstValA, const unsigned &InstValB,
714 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
715 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
717 bool WasInserted;
718 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
719 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
720 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
721 return false;
722 else if (ValueMappingIt->second.size() != 1) {
723 for (unsigned OtherVal : ValueMappingIt->second) {
724 if (OtherVal == InstValB)
725 continue;
726 auto OtherValIt = ValueNumberMappingA.find(OtherVal);
727 if (OtherValIt == ValueNumberMappingA.end())
728 continue;
729 OtherValIt->second.erase(InstValA);
730 }
731 ValueNumberMappingA.erase(ValueMappingIt);
732 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
733 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
734 }
735
736 return true;
737}
738
741 // Get the basic blocks the label refers to.
742 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
743 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
744
745 // Get the basic blocks contained in each region.
746 DenseSet<BasicBlock *> BasicBlockA;
747 DenseSet<BasicBlock *> BasicBlockB;
748 A.IRSC.getBasicBlocks(BasicBlockA);
749 B.IRSC.getBasicBlocks(BasicBlockB);
750
751 // Determine if the block is contained in the region.
752 bool AContained = BasicBlockA.contains(ABB);
753 bool BContained = BasicBlockB.contains(BBB);
754
755 // Both blocks need to be contained in the region, or both need to be outside
756 // the region.
757 if (AContained != BContained)
758 return false;
759
760 // If both are contained, then we need to make sure that the relative
761 // distance to the target blocks are the same.
762 if (AContained)
763 return A.RelativeLocation == B.RelativeLocation;
764 return true;
765}
766
773
778
781 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
782 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
783 if (A.getLength() != B.getLength())
784 return false;
785
786 if (A.ValueToNumber.size() != B.ValueToNumber.size())
787 return false;
788
789 iterator ItA = A.begin();
790 iterator ItB = B.begin();
791
792 // These ValueNumber Mapping sets create a create a mapping between the values
793 // in one candidate to values in the other candidate. If we create a set with
794 // one element, and that same element maps to the original element in the
795 // candidate we have a good mapping.
796
797 // Iterate over the instructions contained in each candidate
798 unsigned SectionLength = A.getStartIdx() + A.getLength();
799 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
800 ItA++, ItB++, Loc++) {
801 // Make sure the instructions are similar to one another.
802 if (!isClose(*ItA, *ItB))
803 return false;
804
805 Instruction *IA = ItA->Inst;
806 Instruction *IB = ItB->Inst;
807
808 if (!ItA->Legal || !ItB->Legal)
809 return false;
810
811 // Get the operand sets for the instructions.
812 ArrayRef<Value *> OperValsA = ItA->OperVals;
813 ArrayRef<Value *> OperValsB = ItB->OperVals;
814
815 unsigned InstValA = A.ValueToNumber.find(IA)->second;
816 unsigned InstValB = B.ValueToNumber.find(IB)->second;
817
818 // Ensure that the mappings for the instructions exists.
819 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
820 ValueNumberMappingB))
821 return false;
822
823 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
824 ValueNumberMappingA))
825 return false;
826
827 // We have different paths for commutative instructions and non-commutative
828 // instructions since commutative instructions could allow multiple mappings
829 // to certain values.
830 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
831 !isa<IntrinsicInst>(IA)) {
833 {A, OperValsA, ValueNumberMappingA},
834 {B, OperValsB, ValueNumberMappingB}))
835 return false;
836 continue;
837 }
838
839 // Handle the non-commutative cases.
841 {A, OperValsA, ValueNumberMappingA},
842 {B, OperValsB, ValueNumberMappingB}))
843 return false;
844
845 // Here we check that between two corresponding instructions,
846 // when referring to a basic block in the same region, the
847 // relative locations are the same. And, that the instructions refer to
848 // basic blocks outside the region in the same corresponding locations.
849
850 // We are able to make the assumption about blocks outside of the region
851 // since the target block labels are considered values and will follow the
852 // same number matching that we defined for the other instructions in the
853 // region. So, at this point, in each location we target a specific block
854 // outside the region, we are targeting a corresponding block in each
855 // analagous location in the region we are comparing to.
857 IA->getOpcode() != IB->getOpcode())
858 continue;
859
860 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
861 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
862 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
863 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
864
865 // Check to make sure that the number of operands, and branching locations
866 // between BranchInsts is the same.
867 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
868 ABL.size() != BBL.size())
869 return false;
870
871 assert(RelBlockLocsA.size() == ABL.size() &&
872 "Block information vectors not the same size.");
873 assert(RelBlockLocsB.size() == BBL.size() &&
874 "Block information vectors not the same size.");
875
876 ZippedRelativeLocationsT ZippedRelativeLocations =
877 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
878 if (any_of(ZippedRelativeLocations,
879 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
881 {A, std::get<0>(R), std::get<2>(R)},
882 {B, std::get<1>(R), std::get<3>(R)});
883 }))
884 return false;
885 }
886 return true;
887}
888
890 const IRSimilarityCandidate &B) {
891 auto DoesOverlap = [](const IRSimilarityCandidate &X,
892 const IRSimilarityCandidate &Y) {
893 // Check:
894 // XXXXXX X starts before Y ends
895 // YYYYYYY Y starts after X starts
896 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
897 };
898
899 return DoesOverlap(A, B) || DoesOverlap(B, A);
900}
901
902void IRSimilarityIdentifier::populateMapper(
903 Module &M, std::vector<IRInstructionData *> &InstrList,
904 std::vector<unsigned> &IntegerMapping) {
905
906 std::vector<IRInstructionData *> InstrListForModule;
907 std::vector<unsigned> IntegerMappingForModule;
908 // Iterate over the functions in the module to map each Instruction in each
909 // BasicBlock to an unsigned integer.
910 Mapper.initializeForBBs(M);
911
912 for (Function &F : M) {
913
914 if (F.empty())
915 continue;
916
917 for (BasicBlock &BB : F) {
918
919 // BB has potential to have similarity since it has a size greater than 2
920 // and can therefore match other regions greater than 2. Map it to a list
921 // of unsigned integers.
922 Mapper.convertToUnsignedVec(BB, InstrListForModule,
923 IntegerMappingForModule);
924 }
925
926 BasicBlock::iterator It = F.begin()->end();
927 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
928 true);
929 if (InstrListForModule.size() > 0)
930 Mapper.IDL->push_back(*InstrListForModule.back());
931 }
932
933 // Insert the InstrListForModule at the end of the overall InstrList so that
934 // we can have a long InstrList for the entire set of Modules being analyzed.
935 llvm::append_range(InstrList, InstrListForModule);
936 // Do the same as above, but for IntegerMapping.
937 llvm::append_range(IntegerMapping, IntegerMappingForModule);
938}
939
940void IRSimilarityIdentifier::populateMapper(
941 ArrayRef<std::unique_ptr<Module>> &Modules,
942 std::vector<IRInstructionData *> &InstrList,
943 std::vector<unsigned> &IntegerMapping) {
944
945 // Iterate over, and map the instructions in each module.
946 for (const std::unique_ptr<Module> &M : Modules)
947 populateMapper(*M, InstrList, IntegerMapping);
948}
949
950/// From a repeated subsequence, find all the different instances of the
951/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
952/// the IRInstructionData in subsequence.
953///
954/// \param [in] Mapper - The instruction mapper for basic correctness checks.
955/// \param [in] InstrList - The vector that holds the instruction data.
956/// \param [in] IntegerMapping - The vector that holds the mapped integers.
957/// \param [out] CandsForRepSubstring - The vector to store the generated
958/// IRSimilarityCandidates.
960 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
961 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
962 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
963
964 unsigned StringLen = RS.Length;
965 if (StringLen < 2)
966 return;
967
968 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
969 for (const unsigned &StartIdx : RS.StartIndices) {
970 unsigned EndIdx = StartIdx + StringLen - 1;
971
972 // Check that this subsequence does not contain an illegal instruction.
973 bool ContainsIllegal = false;
974 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
975 unsigned Key = IntegerMapping[CurrIdx];
976 if (Key > Mapper.IllegalInstrNumber) {
977 ContainsIllegal = true;
978 break;
979 }
980 }
981
982 // If we have an illegal instruction, we should not create an
983 // IRSimilarityCandidate for this region.
984 if (ContainsIllegal)
985 continue;
986
987 // We are getting iterators to the instructions in this region of code
988 // by advancing the start and end indices from the start of the
989 // InstrList.
990 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
991 std::advance(StartIt, StartIdx);
992 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
993 std::advance(EndIt, EndIdx);
994
995 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
996 }
997}
998
1000 IRSimilarityCandidate &SourceCand,
1001 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1002 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1003 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1004 "Base canonical relationship is empty!");
1005 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1006 "Base canonical relationship is empty!");
1007
1008 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1009 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1010
1011 DenseSet<unsigned> UsedGVNs;
1012 // Iterate over the mappings provided from this candidate to SourceCand. We
1013 // are then able to map the GVN in this candidate to the same canonical number
1014 // given to the corresponding GVN in SourceCand.
1015 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1016 unsigned SourceGVN = GVNMapping.first;
1017
1018 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1019
1020 unsigned ResultGVN;
1021 // We need special handling if we have more than one potential value. This
1022 // means that there are at least two GVNs that could correspond to this GVN.
1023 // This could lead to potential swapping later on, so we make a decision
1024 // here to ensure a one-to-one mapping.
1025 if (GVNMapping.second.size() > 1) {
1026 bool Found = false;
1027 for (unsigned Val : GVNMapping.second) {
1028 // We make sure the target value number hasn't already been reserved.
1029 if (UsedGVNs.contains(Val))
1030 continue;
1031
1032 // We make sure that the opposite mapping is still consistent.
1034 FromSourceMapping.find(Val);
1035
1036 if (!It->second.contains(SourceGVN))
1037 continue;
1038
1039 // We pick the first item that satisfies these conditions.
1040 Found = true;
1041 ResultGVN = Val;
1042 break;
1043 }
1044
1045 assert(Found && "Could not find matching value for source GVN");
1046 (void)Found;
1047
1048 } else
1049 ResultGVN = *GVNMapping.second.begin();
1050
1051 // Whatever GVN is found, we mark it as used.
1052 UsedGVNs.insert(ResultGVN);
1053
1054 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1055 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1056 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1057 }
1058
1060 getBasicBlocks(BBSet);
1061 // Find canonical numbers for the BasicBlocks in the current candidate.
1062 // This is done by finding the corresponding value for the first instruction
1063 // in the block in the current candidate, finding the matching value in the
1064 // source candidate. Then by finding the parent of this value, use the
1065 // canonical number of the block in the source candidate for the canonical
1066 // number in the current candidate.
1067 for (BasicBlock *BB : BBSet) {
1068 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1069
1070 // We can skip the BasicBlock if the canonical numbering has already been
1071 // found in a separate instruction.
1072 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1073 continue;
1074
1075 // If the basic block is the starting block, then the shared instruction may
1076 // not be the first instruction in the block, it will be the first
1077 // instruction in the similarity region.
1078 Value *FirstOutlineInst =
1079 BB == getStartBB() ? frontInstruction() : &*BB->begin();
1080
1081 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1082 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1083 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1084 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1085 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1086 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1087 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1088 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1089 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1090 }
1091}
1092
1094 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1095 IRSimilarityCandidate &TargetCandLarge) {
1096 assert(!SourceCand.CanonNumToNumber.empty() &&
1097 "Canonical Relationship is non-empty");
1098 assert(!SourceCand.NumberToCanonNum.empty() &&
1099 "Canonical Relationship is non-empty");
1100
1101 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1102 "Canonical Relationship is non-empty");
1103 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1104 "Canonical Relationship is non-empty");
1105
1106 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1107 "Canonical Relationship is non-empty");
1108 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1109 "Canonical Relationship is non-empty");
1110
1111 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1112 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1113
1114 // We're going to use the larger candidates as a "bridge" to create the
1115 // canonical number for the target candidate since we have idetified two
1116 // candidates as subsequences of larger sequences, and therefore must be
1117 // structurally similar.
1118 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1119 Value *CurrVal = ValueNumPair.first;
1120 unsigned TargetCandGVN = ValueNumPair.second;
1121
1122 // Find the numbering in the large candidate that surrounds the
1123 // current candidate.
1124 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1125 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1126
1127 // Get the canonical numbering in the large target candidate.
1128 std::optional<unsigned> OTargetCandCanon =
1129 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1130 assert(OTargetCandCanon.has_value() &&
1131 "Canononical Number not found for GVN");
1132
1133 // Get the GVN in the large source candidate from the canonical numbering.
1134 std::optional<unsigned> OLargeSourceGVN =
1135 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1136 assert(OLargeSourceGVN.has_value() &&
1137 "GVN Number not found for Canonical Number");
1138
1139 // Get the Value from the GVN in the large source candidate.
1140 std::optional<Value *> OLargeSourceV =
1141 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1142 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1143
1144 // Get the GVN number for the Value in the source candidate.
1145 std::optional<unsigned> OSourceGVN =
1146 SourceCand.getGVN(OLargeSourceV.value());
1147 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1148
1149 // Get the canonical numbering from the GVN/
1150 std::optional<unsigned> OSourceCanon =
1151 SourceCand.getCanonicalNum(OSourceGVN.value());
1152 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1153
1154 // Insert the canonical numbering and GVN pair into their respective
1155 // mappings.
1156 CanonNumToNumber.insert(
1157 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1158 NumberToCanonNum.insert(
1159 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1160 }
1161}
1162
1164 IRSimilarityCandidate &CurrCand) {
1165 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1166 "Canonical Relationship is non-empty");
1167 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1168 "Canonical Relationship is non-empty");
1169
1170 unsigned CanonNum = 0;
1171 // Iterate over the value numbers found, the order does not matter in this
1172 // case.
1173 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1174 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1175 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1176 CanonNum++;
1177 }
1178}
1179
1180/// Look for larger IRSimilarityCandidates From the previously matched
1181/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1182/// an overlap, return a pair of structurally similar, larger
1183/// IRSimilarityCandidates.
1184///
1185/// \param [in] CandA - The first candidate we are trying to determine the
1186/// structure of.
1187/// \param [in] CandB - The second candidate we are trying to determine the
1188/// structure of.
1189/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1190/// a circuit to the IRSimilarityCandidates that include this instruction.
1191/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1192/// number representing the structural group assigned to it.
1193static std::optional<
1194 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1197 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1199 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1200 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1201 DenseSet<unsigned> IncludedGroupsA;
1202 DenseSet<unsigned> IncludedGroupsB;
1203
1204 // Find the overall similarity group numbers that fully contain the candidate,
1205 // and record the larger candidate for each group.
1206 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1207 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1208 Result;
1209
1210 unsigned CandAStart = CandA.getStartIdx();
1211 unsigned CandAEnd = CandA.getEndIdx();
1212 unsigned CandBStart = CandB.getStartIdx();
1213 unsigned CandBEnd = CandB.getEndIdx();
1214 if (IdxToCandidateIt == IndexToIncludedCand.end())
1215 return Result;
1216 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1217 if (MatchedCand->getStartIdx() > CandAStart ||
1218 (MatchedCand->getEndIdx() < CandAEnd))
1219 continue;
1220 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1221 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1222 IncludedGroupsA.insert(GroupNum);
1223 }
1224
1225 // Find the overall similarity group numbers that fully contain the next
1226 // candidate, and record the larger candidate for each group.
1227 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1228 if (IdxToCandidateIt == IndexToIncludedCand.end())
1229 return Result;
1230 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1231 if (MatchedCand->getStartIdx() > CandBStart ||
1232 MatchedCand->getEndIdx() < CandBEnd)
1233 continue;
1234 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1235 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1236 IncludedGroupsB.insert(GroupNum);
1237 }
1238
1239 // Find the intersection between the two groups, these are the groups where
1240 // the larger candidates exist.
1241 set_intersect(IncludedGroupsA, IncludedGroupsB);
1242
1243 // If there is no intersection between the sets, then we cannot determine
1244 // whether or not there is a match.
1245 if (IncludedGroupsA.empty())
1246 return Result;
1247
1248 // Create a pair that contains the larger candidates.
1249 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1250 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1251 Result = std::make_pair(ItA->second, ItB->second);
1252 return Result;
1253}
1254
1255/// From the list of IRSimilarityCandidates, perform a comparison between each
1256/// IRSimilarityCandidate to determine if there are overlapping
1257/// IRInstructionData, or if they do not have the same structure.
1258///
1259/// \param [in] CandsForRepSubstring - The vector containing the
1260/// IRSimilarityCandidates.
1261/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1262/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1263/// vector are structurally similar to one another.
1264/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1265/// a circuit to the IRSimilarityCandidates that include this instruction.
1266/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1267/// number representing the structural group assigned to it.
1269 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1270 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1271 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1273 ) {
1274 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1275 InnerCandEndIt;
1276
1277 // IRSimilarityCandidates each have a structure for operand use. It is
1278 // possible that two instances of the same subsequences have different
1279 // structure. Each type of structure found is assigned a number. This
1280 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1281 // discovered it fits within.
1283
1284 // Find the compatibility from each candidate to the others to determine
1285 // which candidates overlap and which have the same structure by mapping
1286 // each structure to a different group.
1287 bool SameStructure;
1288 bool Inserted;
1289 unsigned CurrentGroupNum = 0;
1290 unsigned OuterGroupNum;
1294
1295 // Iterate over the candidates to determine its structural and overlapping
1296 // compatibility with other instructions
1297 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1298 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1299 for (CandIt = CandsForRepSubstring.begin(),
1300 CandEndIt = CandsForRepSubstring.end();
1301 CandIt != CandEndIt; CandIt++) {
1302
1303 // Determine if it has an assigned structural group already.
1304 // If not, we assign it one, and add it to our mapping.
1305 std::tie(CandToGroupIt, Inserted) =
1306 CandToGroup.try_emplace(&*CandIt, CurrentGroupNum);
1307 if (Inserted)
1308 ++CurrentGroupNum;
1309
1310 // Get the structural group number from the iterator.
1311 OuterGroupNum = CandToGroupIt->second;
1312
1313 // Check if we already have a list of IRSimilarityCandidates for the current
1314 // structural group. Create one if one does not exist.
1315 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1316 if (CurrentGroupPair == StructuralGroups.end()) {
1318 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1319 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1320 }
1321
1322 // Iterate over the IRSimilarityCandidates following the current
1323 // IRSimilarityCandidate in the list to determine whether the two
1324 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1325 // of IRSimilarityCandidates.
1326 for (InnerCandIt = std::next(CandIt),
1327 InnerCandEndIt = CandsForRepSubstring.end();
1328 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1329
1330 // We check if the inner item has a group already, if it does, we skip it.
1331 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1332 if (CandToGroupItInner != CandToGroup.end())
1333 continue;
1334
1335 // Check if we have found structural similarity between two candidates
1336 // that fully contains the first and second candidates.
1337 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1338 LargerPair = CheckLargerCands(
1339 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1340
1341 // If a pair was found, it means that we can assume that these smaller
1342 // substrings are also structurally similar. Use the larger candidates to
1343 // determine the canonical mapping between the two sections.
1344 if (LargerPair.has_value()) {
1345 SameStructure = true;
1346 InnerCandIt->createCanonicalRelationFrom(
1347 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1348 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1349 CurrentGroupPair->second.push_back(*InnerCandIt);
1350 continue;
1351 }
1352
1353 // Otherwise we determine if they have the same structure and add it to
1354 // vector if they match.
1355 ValueNumberMappingA.clear();
1356 ValueNumberMappingB.clear();
1358 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1359 if (!SameStructure)
1360 continue;
1361
1362 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1363 ValueNumberMappingB);
1364 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1365 CurrentGroupPair->second.push_back(*InnerCandIt);
1366 }
1367 }
1368}
1369
1370void IRSimilarityIdentifier::findCandidates(
1371 std::vector<IRInstructionData *> &InstrList,
1372 std::vector<unsigned> &IntegerMapping) {
1373 SuffixTree ST(IntegerMapping);
1374
1375 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1376 std::vector<SimilarityGroup> NewCandidateGroups;
1377
1378 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1379 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1380 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1381
1382 // Iterate over the subsequences found by the Suffix Tree to create
1383 // IRSimilarityCandidates for each repeated subsequence and determine which
1384 // instances are structurally similar to one another.
1385
1386 // Sort the suffix tree from longest substring to shortest.
1387 std::vector<SuffixTree::RepeatedSubstring> RSes;
1388 for (SuffixTree::RepeatedSubstring &RS : ST)
1389 RSes.push_back(RS);
1390
1391 llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1392 const SuffixTree::RepeatedSubstring &RHS) {
1393 return LHS.Length > RHS.Length;
1394 });
1395 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1396 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1397 CandsForRepSubstring);
1398
1399 if (CandsForRepSubstring.size() < 2)
1400 continue;
1401
1402 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1403 IndexToIncludedCand, CandToGroup);
1404 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1405 // We only add the group if it contains more than one
1406 // IRSimilarityCandidate. If there is only one, that means there is no
1407 // other repeated subsequence with the same structure.
1408 if (Group.second.size() > 1) {
1409 SimilarityCandidates->push_back(Group.second);
1410 // Iterate over each candidate in the group, and add an entry for each
1411 // instruction included with a mapping to a set of
1412 // IRSimilarityCandidates that include that instruction.
1413 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1414 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1415 Idx <= Edx; ++Idx)
1416 IndexToIncludedCand[Idx].insert(&IRCand);
1417 // Add mapping of candidate to the overall similarity group number.
1418 CandToGroup.insert(
1419 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1420 }
1421 }
1422 }
1423
1424 CandsForRepSubstring.clear();
1425 StructuralGroups.clear();
1426 NewCandidateGroups.clear();
1427 }
1428}
1429
1431 ArrayRef<std::unique_ptr<Module>> Modules) {
1433
1434 std::vector<IRInstructionData *> InstrList;
1435 std::vector<unsigned> IntegerMapping;
1436 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1437 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1438 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1439 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1440 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1441
1442 populateMapper(Modules, InstrList, IntegerMapping);
1443 findCandidates(InstrList, IntegerMapping);
1444
1445 return *SimilarityCandidates;
1446}
1447
1450 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1451 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1452 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1453 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1454 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1455
1456 std::vector<IRInstructionData *> InstrList;
1457 std::vector<unsigned> IntegerMapping;
1458
1459 populateMapper(M, InstrList, IntegerMapping);
1460 findCandidates(InstrList, IntegerMapping);
1461
1462 return *SimilarityCandidates;
1463}
1464
1466 "ir-similarity-identifier", false, true)
1467
1469 : ModulePass(ID) {}
1470
1477
1479 IRSI.reset();
1480 return false;
1481}
1482
1484 IRSI->findSimilarity(M);
1485 return false;
1486}
1487
1488AnalysisKey IRSimilarityAnalysis::Key;
1493 false);
1494 IRSI.findSimilarity(M);
1495 return IRSI;
1496}
1497
1501 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1502 IRSI.getSimilarity();
1503
1504 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1505 OS << CandVec.size() << " candidates of length "
1506 << CandVec.begin()->getLength() << ". Found in: \n";
1507 for (IRSimilarityCandidate &Cand : CandVec) {
1508 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1509 << ", Basic Block: ";
1510 if (Cand.front()->Inst->getParent()->getName().str() == "")
1511 OS << "(unnamed)";
1512 else
1513 OS << Cand.front()->Inst->getParent()->getName().str();
1514 OS << "\n Start Instruction: ";
1515 Cand.frontInstruction()->print(OS);
1516 OS << "\n End Instruction: ";
1517 Cand.backInstruction()->print(OS);
1518 OS << "\n";
1519 }
1520 }
1521
1522 return PreservedAnalyses::all();
1523}
1524
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines generic set operations that may be used on set's of different types,...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const_pointer iterator
Definition ArrayRef.h:47
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
unsigned size() const
Definition DenseMap.h:110
bool empty() const
Definition DenseMap.h:109
iterator begin()
Definition DenseMap.h:78
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
void swap(DerivedT &RHS)
Definition DenseMap.h:371
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Class to represent function types.
ArrayRef< Type * > params() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
LLVM_ABI void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static LLVM_ABI bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static LLVM_ABI bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static LLVM_ABI bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static LLVM_ABI bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static LLVM_ABI void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static LLVM_ABI bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
LLVM_ABI IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static LLVM_ABI bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static LLVM_ABI bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
iterator find(const_arg_type_t< ValueT > V)
Definition DenseSet.h:167
size_type size() const
Definition DenseSet.h:87
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
const ParentTy * getParent() const
Definition ilist_node.h:34
ilist_select_iterator_type< OptionsT, false, false > iterator
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
LLVM_ABI bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
LLVM_ABI StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
void stable_sort(R &&Range)
Definition STLExtras.h:2116
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
ArrayRef(const T &OneElt) -> ArrayRef< T >
static cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
LLVM_ABI StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
LLVM_ABI void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
LLVM_ABI IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
LLVM_ABI ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static LLVM_ABI CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
LLVM_ABI void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
LLVM_ABI void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
bool Legal
The legality of the wrapped instruction.
LLVM_ABI void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
LLVM_ABI CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
LLVM_ABI IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
LLVM_ABI IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
LLVM_ABI unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
LLVM_ABI void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
LLVM_ABI unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
Definition SuffixTree.h:50