LLVM 23.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
39static cl::opt<bool>
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
81 llvm::append_range(OperVals, PN->blocks());
82}
83
86
88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89 assert((isa<UncondBrInst, CondBrInst>(Inst)) && "Instruction must be branch");
90
92
93 BBNumIt = BasicBlockToInteger.find(Inst->getParent());
94 assert(BBNumIt != BasicBlockToInteger.end() &&
95 "Could not find location for BasicBlock!");
96
97 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
98
99 for (Value *V : getBlockOperVals()) {
101 BBNumIt = BasicBlockToInteger.find(Successor);
102 assert(BBNumIt != BasicBlockToInteger.end() &&
103 "Could not find number for BasicBlock!");
104 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105
106 int Relative = OtherBlockNumber - CurrentBlockNumber;
107 RelativeBlockLocations.push_back(Relative);
108 }
109}
110
113 return OperVals;
115 return ArrayRef<Value *>(OperVals).drop_front(1);
116
117 if (PHINode *PN = dyn_cast<PHINode>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), PN->getNumIncomingValues()),
120 OperVals.end()
121 );
122
123 llvm_unreachable("Instruction must be branch or PHINode");
124}
125
126void IRInstructionData::setCalleeName(bool MatchByName) {
128 assert(CI && "Instruction must be call");
129
130 CalleeName = "";
132 // To hash intrinsics, we use the opcode, and types like the other
133 // instructions, but also, the Intrinsic ID, and the Name of the
134 // intrinsic.
135 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
136 FunctionType *FT = II->getFunctionType();
137 // If there is an overloaded name, we have to use the complex version
138 // of getName to get the entire string.
139 if (Intrinsic::isOverloaded(IntrinsicID))
140 CalleeName =
141 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
142 // If there is not an overloaded name, we only need to use this version.
143 else
144 CalleeName = Intrinsic::getName(IntrinsicID).str();
145
146 return;
147 }
148
149 if (!CI->isIndirectCall() && MatchByName)
151}
152
154 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
155 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
156
159
160 BBNumIt = BasicBlockToInteger.find(PN->getParent());
161 assert(BBNumIt != BasicBlockToInteger.end() &&
162 "Could not find location for BasicBlock!");
163
164 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
165
166 // Convert the incoming blocks of the PHINode to an integer value, based on
167 // the relative distances between the current block and the incoming block.
168 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
169 BasicBlock *Incoming = PN->getIncomingBlock(Idx);
170 BBNumIt = BasicBlockToInteger.find(Incoming);
171 assert(BBNumIt != BasicBlockToInteger.end() &&
172 "Could not find number for BasicBlock!");
173 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
174
175 int Relative = OtherBlockNumber - CurrentBlockNumber;
176 RelativeBlockLocations.push_back(Relative);
177 }
178}
179
181 switch (CI->getPredicate()) {
190 return CI->getSwappedPredicate();
191 default:
192 return CI->getPredicate();
193 }
194}
195
198 "Can only get a predicate from a compare instruction");
199
201 return *RevisedPredicate;
202
203 return cast<CmpInst>(Inst)->getPredicate();
204}
205
208 "Can only get a name from a call instruction");
209
210 assert(CalleeName && "CalleeName has not been set");
211
212 return *CalleeName;
213}
214
216 const IRInstructionData &B) {
217
218 if (!A.Legal || !B.Legal)
219 return false;
220
221 // Check if we are performing the same sort of operation on the same types
222 // but not on the same values.
223 if (!A.Inst->isSameOperationAs(B.Inst)) {
224 // If there is a predicate, this means that either there is a swapped
225 // predicate, or that the types are different, we want to make sure that
226 // the predicates are equivalent via swapping.
227 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
228
229 if (A.getPredicate() != B.getPredicate())
230 return false;
231
232 // If the predicates are the same via swap, make sure that the types are
233 // still the same.
234 auto ZippedTypes = zip(A.OperVals, B.OperVals);
235
236 return all_of(
237 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
238 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
239 });
240 }
241
242 return false;
243 }
244
245 // Since any GEP Instruction operands after the first operand cannot be
246 // defined by a register, we must make sure that the operands after the first
247 // are the same in the two instructions
248 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
249 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
250
251 // If the instructions do not have the same inbounds restrictions, we do
252 // not consider them the same.
253 if (GEP->isInBounds() != OtherGEP->isInBounds())
254 return false;
255
256 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
257
258 // We increment here since we do not care about the first instruction,
259 // we only care about the following operands since they must be the
260 // exact same to be considered similar.
261 return all_of(drop_begin(ZippedOperands),
262 [](std::tuple<llvm::Use &, llvm::Use &> R) {
263 return std::get<0>(R) == std::get<1>(R);
264 });
265 }
266
267 // If the instructions are functions calls, we make sure that the function
268 // name is the same. We already know that the types are since is
269 // isSameOperationAs is true.
270 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
271 if (A.getCalleeName() != B.getCalleeName())
272 return false;
273 }
274
277 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
278 return false;
279
280 return true;
281}
282
283// TODO: This is the same as the MachineOutliner, and should be consolidated
284// into the same interface.
286 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
287 std::vector<unsigned> &IntegerMapping) {
288 BasicBlock::iterator It = BB.begin();
289
290 std::vector<unsigned> IntegerMappingForBB;
291 std::vector<IRInstructionData *> InstrListForBB;
292
293 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
294 switch (InstClassifier.visit(*It)) {
295 case InstrType::Legal:
296 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
297 break;
299 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
300 break;
302 AddedIllegalLastTime = false;
303 break;
304 }
305 }
306
308 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
309 for (IRInstructionData *ID : InstrListForBB)
310 this->IDL->push_back(*ID);
311 llvm::append_range(InstrList, InstrListForBB);
312 llvm::append_range(IntegerMapping, IntegerMappingForBB);
313}
314
315// TODO: This is the same as the MachineOutliner, and should be consolidated
316// into the same interface.
318 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
319 std::vector<IRInstructionData *> &InstrListForBB) {
320 // We added something legal, so we should unset the AddedLegalLastTime
321 // flag.
322 AddedIllegalLastTime = false;
323
324 // If we have at least two adjacent legal instructions (which may have
325 // invisible instructions in between), remember that.
327 HaveLegalRange = true;
329
330 // Get the integer for this instruction or give it the current
331 // LegalInstrNumber.
333 InstrListForBB.push_back(ID);
334
336 ID->setBranchSuccessors(BasicBlockToInteger);
337
338 if (isa<CallInst>(*It))
339 ID->setCalleeName(EnableMatchCallsByName);
340
341 if (isa<PHINode>(*It))
342 ID->setPHIPredecessors(BasicBlockToInteger);
343
344 // Add to the instruction list
345 bool WasInserted;
347 ResultIt;
348 std::tie(ResultIt, WasInserted) =
349 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
350 unsigned INumber = ResultIt->second;
351
352 // There was an insertion.
353 if (WasInserted)
355
356 IntegerMappingForBB.push_back(INumber);
357
358 // Make sure we don't overflow or use any integers reserved by the DenseMap.
360 "Instruction mapping overflow!");
361
362 return INumber;
363}
364
370
375
380
381// TODO: This is the same as the MachineOutliner, and should be consolidated
382// into the same interface.
384 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
385 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
386 // Can't combine an illegal instruction. Set the flag.
388
389 // Only add one illegal number per range of legal numbers.
391 return IllegalInstrNumber;
392
393 IRInstructionData *ID = nullptr;
394 if (!End)
395 ID = allocateIRInstructionData(*It, false, *IDL);
396 else
398 InstrListForBB.push_back(ID);
399
400 // Remember that we added an illegal number last time.
402 unsigned INumber = IllegalInstrNumber;
403 IntegerMappingForBB.push_back(IllegalInstrNumber--);
404
406 "Instruction mapping overflow!");
407
408 return INumber;
409}
410
411IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
412 IRInstructionData *FirstInstIt,
413 IRInstructionData *LastInstIt)
414 : StartIdx(StartIdx), Len(Len) {
415
416 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
417 assert(LastInstIt != nullptr && "Instruction is nullptr!");
418 assert(StartIdx + Len > StartIdx &&
419 "Overflow for IRSimilarityCandidate range?");
420 assert(Len - 1 == static_cast<unsigned>(std::distance(
421 iterator(FirstInstIt), iterator(LastInstIt))) &&
422 "Length of the first and last IRInstructionData do not match the "
423 "given length");
424
425 // We iterate over the given instructions, and map each unique value
426 // to a unique number in the IRSimilarityCandidate ValueToNumber and
427 // NumberToValue maps. A constant get its own value globally, the individual
428 // uses of the constants are not considered to be unique.
429 //
430 // IR: Mapping Added:
431 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
432 // %add2 = add i32 %a, %1 %add2 -> 4
433 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
434 //
435 // when replace with global values, starting from 1, would be
436 //
437 // 3 = add i32 1, 2
438 // 4 = add i32 1, 3
439 // 6 = add i32 5, 2
440 unsigned LocalValNumber = 1;
442 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
443 // Map the operand values to an unsigned integer if it does not already
444 // have an unsigned integer assigned to it.
445 for (Value *Arg : ID->OperVals)
446 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
447 NumberToValue.try_emplace(LocalValNumber, Arg);
448 LocalValNumber++;
449 }
450
451 // Mapping the instructions to an unsigned integer if it is not already
452 // exist in the mapping.
453 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
454 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
455 LocalValNumber++;
456 }
457 }
458
459 // Setting the first and last instruction data pointers for the candidate. If
460 // we got through the entire for loop without hitting an assert, we know
461 // that both of these instructions are not nullptrs.
462 FirstInst = FirstInstIt;
463 LastInst = LastInstIt;
464
465 // Add the basic blocks contained in the set into the global value numbering.
467 getBasicBlocks(BBSet);
468 for (BasicBlock *BB : BBSet) {
469 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
470 NumberToValue.try_emplace(LocalValNumber, BB);
471 LocalValNumber++;
472 }
473 }
474}
475
477 const IRSimilarityCandidate &B) {
478 if (A.getLength() != B.getLength())
479 return false;
480
481 auto InstrDataForBoth =
482 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
483
484 return all_of(InstrDataForBoth,
485 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
486 IRInstructionData &A = std::get<0>(R);
487 IRInstructionData &B = std::get<1>(R);
488 if (!A.Legal || !B.Legal)
489 return false;
490 return isClose(A, B);
491 });
492}
493
494/// Determine if one or more of the assigned global value numbers for the
495/// operands in \p TargetValueNumbers is in the current mapping set for operand
496/// numbers in \p SourceOperands. The set of possible corresponding global
497/// value numbers are replaced with the most recent version of compatible
498/// values.
499///
500/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
501/// value number for the source IRInstructionCandidate.
502/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
503/// IRSimilarityCandidate global value numbers to a set of possible numbers in
504/// the target.
505/// \param [in] SourceOperands - The operands in the original
506/// IRSimilarityCandidate in the current instruction.
507/// \param [in] TargetValueNumbers - The global value numbers of the operands in
508/// the corresponding Instruction in the other IRSimilarityCandidate.
509/// \returns true if there exists a possible mapping between the source
510/// Instruction operands and the target Instruction operands, and false if not.
512 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
513 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
514 ArrayRef<Value *> &SourceOperands,
515 DenseSet<unsigned> &TargetValueNumbers){
516
518
519 unsigned ArgVal;
520 bool WasInserted;
521
522 // Iterate over the operands in the source IRSimilarityCandidate to determine
523 // whether there exists an operand in the other IRSimilarityCandidate that
524 // creates a valid mapping of Value to Value between the
525 // IRSimilarityCaniddates.
526 for (Value *V : SourceOperands) {
527 ArgVal = SourceValueToNumberMapping.find(V)->second;
528
529 // Instead of finding a current mapping, we attempt to insert a set.
530 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
531 std::make_pair(ArgVal, TargetValueNumbers));
532
533 // We need to iterate over the items in other IRSimilarityCandidate's
534 // Instruction to determine whether there is a valid mapping of
535 // Value to Value.
536 DenseSet<unsigned> NewSet;
537 for (unsigned &Curr : ValueMappingIt->second)
538 // If we can find the value in the mapping, we add it to the new set.
539 if (TargetValueNumbers.contains(Curr))
540 NewSet.insert(Curr);
541
542 // If we could not find a Value, return 0.
543 if (NewSet.empty())
544 return false;
545
546 // Otherwise replace the old mapping with the newly constructed one.
547 if (NewSet.size() != ValueMappingIt->second.size())
548 ValueMappingIt->second.swap(NewSet);
549
550 // We have reached no conclusions about the mapping, and cannot remove
551 // any items from the other operands, so we move to check the next operand.
552 if (ValueMappingIt->second.size() != 1)
553 continue;
554
555 unsigned ValToRemove = *ValueMappingIt->second.begin();
556 // When there is only one item left in the mapping for and operand, remove
557 // the value from the other operands. If it results in there being no
558 // mapping, return false, it means the mapping is wrong
559 for (Value *InnerV : SourceOperands) {
560 if (V == InnerV)
561 continue;
562
563 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
564 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
565 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
566 continue;
567
568 ValueMappingIt->second.erase(ValToRemove);
569 if (ValueMappingIt->second.empty())
570 return false;
571 }
572 }
573
574 return true;
575}
576
577/// Determine if operand number \p TargetArgVal is in the current mapping set
578/// for operand number \p SourceArgVal.
579///
580/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
581/// value numbers from source IRSimilarityCandidate to target
582/// IRSimilarityCandidate.
583/// \param [in] SourceArgVal The global value number for an operand in the
584/// in the original candidate.
585/// \param [in] TargetArgVal The global value number for the corresponding
586/// operand in the other candidate.
587/// \returns True if there exists a mapping and false if not.
589 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
590 unsigned SourceArgVal, unsigned TargetArgVal) {
591 // We are given two unsigned integers representing the global values of
592 // the operands in different IRSimilarityCandidates and a current mapping
593 // between the two.
594 //
595 // Source Operand GVN: 1
596 // Target Operand GVN: 2
597 // CurrentMapping: {1: {1, 2}}
598 //
599 // Since we have mapping, and the target operand is contained in the set, we
600 // update it to:
601 // CurrentMapping: {1: {2}}
602 // and can return true. But, if the mapping was
603 // CurrentMapping: {1: {3}}
604 // we would return false.
605
606 bool WasInserted;
608
609 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
610 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
611
612 // If we created a new mapping, then we are done.
613 if (WasInserted)
614 return true;
615
616 // If there is more than one option in the mapping set, and the target value
617 // is included in the mapping set replace that set with one that only includes
618 // the target value, as it is the only valid mapping via the non commutative
619 // instruction.
620
621 DenseSet<unsigned> &TargetSet = Val->second;
622 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
623 TargetSet.clear();
624 TargetSet.insert(TargetArgVal);
625 return true;
626 }
627
628 // Return true if we can find the value in the set.
629 return TargetSet.contains(TargetArgVal);
630}
631
634 // Iterators to keep track of where we are in the operands for each
635 // Instruction.
636 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
637 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
638 unsigned OperandLength = A.OperVals.size();
639
640 // For each operand, get the value numbering and ensure it is consistent.
641 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
642 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
643 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
644
645 // Attempt to add a set with only the target value. If there is no mapping
646 // we can create it here.
647 //
648 // For an instruction like a subtraction:
649 // IRSimilarityCandidateA: IRSimilarityCandidateB:
650 // %resultA = sub %a, %b %resultB = sub %d, %e
651 //
652 // We map %a -> %d and %b -> %e.
653 //
654 // And check to see whether their mapping is consistent in
655 // checkNumberingAndReplace.
656
657 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
658 return false;
659
660 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
661 return false;
662 }
663 return true;
664}
665
668 DenseSet<unsigned> ValueNumbersA;
669 DenseSet<unsigned> ValueNumbersB;
670
671 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
672 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
673 unsigned OperandLength = A.OperVals.size();
674
675 // Find the value number sets for the operands.
676 for (unsigned Idx = 0; Idx < OperandLength;
677 Idx++, VItA++, VItB++) {
678 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
679 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
680 }
681
682 // Iterate over the operands in the first IRSimilarityCandidate and make sure
683 // there exists a possible mapping with the operands in the second
684 // IRSimilarityCandidate.
685 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
686 A.ValueNumberMapping, A.OperVals,
687 ValueNumbersB))
688 return false;
689
690 // Iterate over the operands in the second IRSimilarityCandidate and make sure
691 // there exists a possible mapping with the operands in the first
692 // IRSimilarityCandidate.
693 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
694 B.ValueNumberMapping, B.OperVals,
695 ValueNumbersA))
696 return false;
697
698 return true;
699}
700
702 const unsigned InstValA, const unsigned &InstValB,
703 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
704 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
706 bool WasInserted;
707 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
708 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
709 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
710 return false;
711 else if (ValueMappingIt->second.size() != 1) {
712 // Snapshot the set before iterating: when InstValA maps to itself the
713 // erase below removes InstValA from the very set being iterated, which
714 // invalidates the range iterator under backward-shift deletion.
715 SmallVector<unsigned> OtherVals(ValueMappingIt->second.begin(),
716 ValueMappingIt->second.end());
717 for (unsigned OtherVal : OtherVals) {
718 if (OtherVal == InstValB)
719 continue;
720 auto OtherValIt = ValueNumberMappingA.find(OtherVal);
721 if (OtherValIt == ValueNumberMappingA.end())
722 continue;
723 OtherValIt->second.erase(InstValA);
724 }
725 ValueNumberMappingA.erase(ValueMappingIt);
726 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
727 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
728 }
729
730 return true;
731}
732
735 // Get the basic blocks the label refers to.
736 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
737 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
738
739 // Get the basic blocks contained in each region.
740 DenseSet<BasicBlock *> BasicBlockA;
741 DenseSet<BasicBlock *> BasicBlockB;
742 A.IRSC.getBasicBlocks(BasicBlockA);
743 B.IRSC.getBasicBlocks(BasicBlockB);
744
745 // Determine if the block is contained in the region.
746 bool AContained = BasicBlockA.contains(ABB);
747 bool BContained = BasicBlockB.contains(BBB);
748
749 // Both blocks need to be contained in the region, or both need to be outside
750 // the region.
751 if (AContained != BContained)
752 return false;
753
754 // If both are contained, then we need to make sure that the relative
755 // distance to the target blocks are the same.
756 if (AContained)
757 return A.RelativeLocation == B.RelativeLocation;
758 return true;
759}
760
767
772
775 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
776 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
777 if (A.getLength() != B.getLength())
778 return false;
779
780 if (A.ValueToNumber.size() != B.ValueToNumber.size())
781 return false;
782
783 iterator ItA = A.begin();
784 iterator ItB = B.begin();
785
786 // These ValueNumber Mapping sets create a create a mapping between the values
787 // in one candidate to values in the other candidate. If we create a set with
788 // one element, and that same element maps to the original element in the
789 // candidate we have a good mapping.
790
791 // Iterate over the instructions contained in each candidate
792 unsigned SectionLength = A.getStartIdx() + A.getLength();
793 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
794 ItA++, ItB++, Loc++) {
795 // Make sure the instructions are similar to one another.
796 if (!isClose(*ItA, *ItB))
797 return false;
798
799 Instruction *IA = ItA->Inst;
800 Instruction *IB = ItB->Inst;
801
802 if (!ItA->Legal || !ItB->Legal)
803 return false;
804
805 // Get the operand sets for the instructions.
806 ArrayRef<Value *> OperValsA = ItA->OperVals;
807 ArrayRef<Value *> OperValsB = ItB->OperVals;
808
809 unsigned InstValA = A.ValueToNumber.find(IA)->second;
810 unsigned InstValB = B.ValueToNumber.find(IB)->second;
811
812 // Ensure that the mappings for the instructions exists.
813 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
814 ValueNumberMappingB))
815 return false;
816
817 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
818 ValueNumberMappingA))
819 return false;
820
821 // We have different paths for commutative instructions and non-commutative
822 // instructions since commutative instructions could allow multiple mappings
823 // to certain values.
824 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
825 !isa<IntrinsicInst>(IA)) {
827 {A, OperValsA, ValueNumberMappingA},
828 {B, OperValsB, ValueNumberMappingB}))
829 return false;
830 continue;
831 }
832
833 // Handle the non-commutative cases.
835 {A, OperValsA, ValueNumberMappingA},
836 {B, OperValsB, ValueNumberMappingB}))
837 return false;
838
839 // Here we check that between two corresponding instructions,
840 // when referring to a basic block in the same region, the
841 // relative locations are the same. And, that the instructions refer to
842 // basic blocks outside the region in the same corresponding locations.
843
844 // We are able to make the assumption about blocks outside of the region
845 // since the target block labels are considered values and will follow the
846 // same number matching that we defined for the other instructions in the
847 // region. So, at this point, in each location we target a specific block
848 // outside the region, we are targeting a corresponding block in each
849 // analagous location in the region we are comparing to.
851 IA->getOpcode() != IB->getOpcode())
852 continue;
853
854 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
855 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
856 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
857 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
858
859 // Check to make sure that the number of operands, and branching locations
860 // between BranchInsts is the same.
861 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
862 ABL.size() != BBL.size())
863 return false;
864
865 assert(RelBlockLocsA.size() == ABL.size() &&
866 "Block information vectors not the same size.");
867 assert(RelBlockLocsB.size() == BBL.size() &&
868 "Block information vectors not the same size.");
869
870 ZippedRelativeLocationsT ZippedRelativeLocations =
871 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
872 if (any_of(ZippedRelativeLocations,
873 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
875 {A, std::get<0>(R), std::get<2>(R)},
876 {B, std::get<1>(R), std::get<3>(R)});
877 }))
878 return false;
879 }
880 return true;
881}
882
884 const IRSimilarityCandidate &B) {
885 auto DoesOverlap = [](const IRSimilarityCandidate &X,
886 const IRSimilarityCandidate &Y) {
887 // Check:
888 // XXXXXX X starts before Y ends
889 // YYYYYYY Y starts after X starts
890 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
891 };
892
893 return DoesOverlap(A, B) || DoesOverlap(B, A);
894}
895
896void IRSimilarityIdentifier::populateMapper(
897 Module &M, std::vector<IRInstructionData *> &InstrList,
898 std::vector<unsigned> &IntegerMapping) {
899
900 std::vector<IRInstructionData *> InstrListForModule;
901 std::vector<unsigned> IntegerMappingForModule;
902 // Iterate over the functions in the module to map each Instruction in each
903 // BasicBlock to an unsigned integer.
904 Mapper.initializeForBBs(M);
905
906 for (Function &F : M) {
907
908 if (F.empty())
909 continue;
910
911 for (BasicBlock &BB : F) {
912
913 // BB has potential to have similarity since it has a size greater than 2
914 // and can therefore match other regions greater than 2. Map it to a list
915 // of unsigned integers.
916 Mapper.convertToUnsignedVec(BB, InstrListForModule,
917 IntegerMappingForModule);
918 }
919
920 BasicBlock::iterator It = F.begin()->end();
921 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
922 true);
923 if (InstrListForModule.size() > 0)
924 Mapper.IDL->push_back(*InstrListForModule.back());
925 }
926
927 // Insert the InstrListForModule at the end of the overall InstrList so that
928 // we can have a long InstrList for the entire set of Modules being analyzed.
929 llvm::append_range(InstrList, InstrListForModule);
930 // Do the same as above, but for IntegerMapping.
931 llvm::append_range(IntegerMapping, IntegerMappingForModule);
932}
933
934void IRSimilarityIdentifier::populateMapper(
935 ArrayRef<std::unique_ptr<Module>> &Modules,
936 std::vector<IRInstructionData *> &InstrList,
937 std::vector<unsigned> &IntegerMapping) {
938
939 // Iterate over, and map the instructions in each module.
940 for (const std::unique_ptr<Module> &M : Modules)
941 populateMapper(*M, InstrList, IntegerMapping);
942}
943
944/// From a repeated subsequence, find all the different instances of the
945/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
946/// the IRInstructionData in subsequence.
947///
948/// \param [in] Mapper - The instruction mapper for basic correctness checks.
949/// \param [in] InstrList - The vector that holds the instruction data.
950/// \param [in] IntegerMapping - The vector that holds the mapped integers.
951/// \param [out] CandsForRepSubstring - The vector to store the generated
952/// IRSimilarityCandidates.
954 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
955 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
956 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
957
958 unsigned StringLen = RS.Length;
959 if (StringLen < 2)
960 return;
961
962 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
963 for (const unsigned &StartIdx : RS.StartIndices) {
964 unsigned EndIdx = StartIdx + StringLen - 1;
965
966 // Check that this subsequence does not contain an illegal instruction.
967 bool ContainsIllegal = false;
968 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
969 unsigned Key = IntegerMapping[CurrIdx];
970 if (Key > Mapper.IllegalInstrNumber) {
971 ContainsIllegal = true;
972 break;
973 }
974 }
975
976 // If we have an illegal instruction, we should not create an
977 // IRSimilarityCandidate for this region.
978 if (ContainsIllegal)
979 continue;
980
981 // We are getting iterators to the instructions in this region of code
982 // by advancing the start and end indices from the start of the
983 // InstrList.
984 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
985 std::advance(StartIt, StartIdx);
986 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
987 std::advance(EndIt, EndIdx);
988
989 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
990 }
991}
992
994 IRSimilarityCandidate &SourceCand,
995 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
996 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
997 assert(SourceCand.CanonNumToNumber.size() != 0 &&
998 "Base canonical relationship is empty!");
999 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1000 "Base canonical relationship is empty!");
1001
1002 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1003 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1004
1005 DenseSet<unsigned> UsedGVNs;
1006 // Iterate over the mappings provided from this candidate to SourceCand. We
1007 // are then able to map the GVN in this candidate to the same canonical number
1008 // given to the corresponding GVN in SourceCand.
1009 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1010 unsigned SourceGVN = GVNMapping.first;
1011
1012 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1013
1014 unsigned ResultGVN;
1015 // We need special handling if we have more than one potential value. This
1016 // means that there are at least two GVNs that could correspond to this GVN.
1017 // This could lead to potential swapping later on, so we make a decision
1018 // here to ensure a one-to-one mapping.
1019 if (GVNMapping.second.size() > 1) {
1020 bool Found = false;
1021 for (unsigned Val : GVNMapping.second) {
1022 // We make sure the target value number hasn't already been reserved.
1023 if (UsedGVNs.contains(Val))
1024 continue;
1025
1026 // We make sure that the opposite mapping is still consistent.
1028 FromSourceMapping.find(Val);
1029
1030 if (!It->second.contains(SourceGVN))
1031 continue;
1032
1033 // We pick the first item that satisfies these conditions.
1034 Found = true;
1035 ResultGVN = Val;
1036 break;
1037 }
1038
1039 assert(Found && "Could not find matching value for source GVN");
1040 (void)Found;
1041
1042 } else
1043 ResultGVN = *GVNMapping.second.begin();
1044
1045 // Whatever GVN is found, we mark it as used.
1046 UsedGVNs.insert(ResultGVN);
1047
1048 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1049 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1050 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1051 }
1052
1054 getBasicBlocks(BBSet);
1055 // Find canonical numbers for the BasicBlocks in the current candidate.
1056 // This is done by finding the corresponding value for the first instruction
1057 // in the block in the current candidate, finding the matching value in the
1058 // source candidate. Then by finding the parent of this value, use the
1059 // canonical number of the block in the source candidate for the canonical
1060 // number in the current candidate.
1061 for (BasicBlock *BB : BBSet) {
1062 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1063
1064 // We can skip the BasicBlock if the canonical numbering has already been
1065 // found in a separate instruction.
1066 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1067 continue;
1068
1069 // If the basic block is the starting block, then the shared instruction may
1070 // not be the first instruction in the block, it will be the first
1071 // instruction in the similarity region.
1072 Value *FirstOutlineInst =
1073 BB == getStartBB() ? frontInstruction() : &*BB->begin();
1074
1075 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1076 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1077 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1078 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1079 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1080 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1081 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1082 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1083 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1084 }
1085}
1086
1088 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1089 IRSimilarityCandidate &TargetCandLarge) {
1090 assert(!SourceCand.CanonNumToNumber.empty() &&
1091 "Canonical Relationship is non-empty");
1092 assert(!SourceCand.NumberToCanonNum.empty() &&
1093 "Canonical Relationship is non-empty");
1094
1095 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1096 "Canonical Relationship is non-empty");
1097 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1098 "Canonical Relationship is non-empty");
1099
1100 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1101 "Canonical Relationship is non-empty");
1102 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1103 "Canonical Relationship is non-empty");
1104
1105 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1106 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1107
1108 // We're going to use the larger candidates as a "bridge" to create the
1109 // canonical number for the target candidate since we have idetified two
1110 // candidates as subsequences of larger sequences, and therefore must be
1111 // structurally similar.
1112 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1113 Value *CurrVal = ValueNumPair.first;
1114 unsigned TargetCandGVN = ValueNumPair.second;
1115
1116 // Find the numbering in the large candidate that surrounds the
1117 // current candidate.
1118 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1119 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1120
1121 // Get the canonical numbering in the large target candidate.
1122 std::optional<unsigned> OTargetCandCanon =
1123 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1124 assert(OTargetCandCanon.has_value() &&
1125 "Canononical Number not found for GVN");
1126
1127 // Get the GVN in the large source candidate from the canonical numbering.
1128 std::optional<unsigned> OLargeSourceGVN =
1129 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1130 assert(OLargeSourceGVN.has_value() &&
1131 "GVN Number not found for Canonical Number");
1132
1133 // Get the Value from the GVN in the large source candidate.
1134 std::optional<Value *> OLargeSourceV =
1135 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1136 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1137
1138 // Get the GVN number for the Value in the source candidate.
1139 std::optional<unsigned> OSourceGVN =
1140 SourceCand.getGVN(OLargeSourceV.value());
1141 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1142
1143 // Get the canonical numbering from the GVN/
1144 std::optional<unsigned> OSourceCanon =
1145 SourceCand.getCanonicalNum(OSourceGVN.value());
1146 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1147
1148 // Insert the canonical numbering and GVN pair into their respective
1149 // mappings.
1150 CanonNumToNumber.insert(
1151 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1152 NumberToCanonNum.insert(
1153 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1154 }
1155}
1156
1158 IRSimilarityCandidate &CurrCand) {
1159 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1160 "Canonical Relationship is non-empty");
1161 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1162 "Canonical Relationship is non-empty");
1163
1164 unsigned CanonNum = 0;
1165 // Iterate over the value numbers found, the order does not matter in this
1166 // case.
1167 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1168 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1169 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1170 CanonNum++;
1171 }
1172}
1173
1174/// Look for larger IRSimilarityCandidates From the previously matched
1175/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1176/// an overlap, return a pair of structurally similar, larger
1177/// IRSimilarityCandidates.
1178///
1179/// \param [in] CandA - The first candidate we are trying to determine the
1180/// structure of.
1181/// \param [in] CandB - The second candidate we are trying to determine the
1182/// structure of.
1183/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1184/// a circuit to the IRSimilarityCandidates that include this instruction.
1185/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1186/// number representing the structural group assigned to it.
1187static std::optional<
1188 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1191 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1193 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1194 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1195 DenseSet<unsigned> IncludedGroupsA;
1196 DenseSet<unsigned> IncludedGroupsB;
1197
1198 // Find the overall similarity group numbers that fully contain the candidate,
1199 // and record the larger candidate for each group.
1200 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1201 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1202 Result;
1203
1204 unsigned CandAStart = CandA.getStartIdx();
1205 unsigned CandAEnd = CandA.getEndIdx();
1206 unsigned CandBStart = CandB.getStartIdx();
1207 unsigned CandBEnd = CandB.getEndIdx();
1208 if (IdxToCandidateIt == IndexToIncludedCand.end())
1209 return Result;
1210 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1211 if (MatchedCand->getStartIdx() > CandAStart ||
1212 (MatchedCand->getEndIdx() < CandAEnd))
1213 continue;
1214 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1215 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1216 IncludedGroupsA.insert(GroupNum);
1217 }
1218
1219 // Find the overall similarity group numbers that fully contain the next
1220 // candidate, and record the larger candidate for each group.
1221 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1222 if (IdxToCandidateIt == IndexToIncludedCand.end())
1223 return Result;
1224 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1225 if (MatchedCand->getStartIdx() > CandBStart ||
1226 MatchedCand->getEndIdx() < CandBEnd)
1227 continue;
1228 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1229 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1230 IncludedGroupsB.insert(GroupNum);
1231 }
1232
1233 // Find the intersection between the two groups, these are the groups where
1234 // the larger candidates exist.
1235 set_intersect(IncludedGroupsA, IncludedGroupsB);
1236
1237 // If there is no intersection between the sets, then we cannot determine
1238 // whether or not there is a match.
1239 if (IncludedGroupsA.empty())
1240 return Result;
1241
1242 // Create a pair that contains the larger candidates.
1243 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1244 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1245 Result = std::make_pair(ItA->second, ItB->second);
1246 return Result;
1247}
1248
1249/// From the list of IRSimilarityCandidates, perform a comparison between each
1250/// IRSimilarityCandidate to determine if there are overlapping
1251/// IRInstructionData, or if they do not have the same structure.
1252///
1253/// \param [in] CandsForRepSubstring - The vector containing the
1254/// IRSimilarityCandidates.
1255/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1256/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1257/// vector are structurally similar to one another.
1258/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1259/// a circuit to the IRSimilarityCandidates that include this instruction.
1260/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1261/// number representing the structural group assigned to it.
1263 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1264 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1265 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1267 ) {
1268 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1269 InnerCandEndIt;
1270
1271 // IRSimilarityCandidates each have a structure for operand use. It is
1272 // possible that two instances of the same subsequences have different
1273 // structure. Each type of structure found is assigned a number. This
1274 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1275 // discovered it fits within.
1277
1278 // Find the compatibility from each candidate to the others to determine
1279 // which candidates overlap and which have the same structure by mapping
1280 // each structure to a different group.
1281 bool SameStructure;
1282 bool Inserted;
1283 unsigned CurrentGroupNum = 0;
1284 unsigned OuterGroupNum;
1288
1289 // Iterate over the candidates to determine its structural and overlapping
1290 // compatibility with other instructions
1291 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1292 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1293 for (CandIt = CandsForRepSubstring.begin(),
1294 CandEndIt = CandsForRepSubstring.end();
1295 CandIt != CandEndIt; CandIt++) {
1296
1297 // Determine if it has an assigned structural group already.
1298 // If not, we assign it one, and add it to our mapping.
1299 std::tie(CandToGroupIt, Inserted) =
1300 CandToGroup.try_emplace(&*CandIt, CurrentGroupNum);
1301 if (Inserted)
1302 ++CurrentGroupNum;
1303
1304 // Get the structural group number from the iterator.
1305 OuterGroupNum = CandToGroupIt->second;
1306
1307 // Check if we already have a list of IRSimilarityCandidates for the current
1308 // structural group. Create one if one does not exist.
1309 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1310 if (CurrentGroupPair == StructuralGroups.end()) {
1312 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1313 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1314 }
1315
1316 // Iterate over the IRSimilarityCandidates following the current
1317 // IRSimilarityCandidate in the list to determine whether the two
1318 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1319 // of IRSimilarityCandidates.
1320 for (InnerCandIt = std::next(CandIt),
1321 InnerCandEndIt = CandsForRepSubstring.end();
1322 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1323
1324 // We check if the inner item has a group already, if it does, we skip it.
1325 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1326 if (CandToGroupItInner != CandToGroup.end())
1327 continue;
1328
1329 // Check if we have found structural similarity between two candidates
1330 // that fully contains the first and second candidates.
1331 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1332 LargerPair = CheckLargerCands(
1333 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1334
1335 // If a pair was found, it means that we can assume that these smaller
1336 // substrings are also structurally similar. Use the larger candidates to
1337 // determine the canonical mapping between the two sections.
1338 if (LargerPair.has_value()) {
1339 SameStructure = true;
1340 InnerCandIt->createCanonicalRelationFrom(
1341 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1342 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1343 CurrentGroupPair->second.push_back(*InnerCandIt);
1344 continue;
1345 }
1346
1347 // Otherwise we determine if they have the same structure and add it to
1348 // vector if they match.
1349 ValueNumberMappingA.clear();
1350 ValueNumberMappingB.clear();
1352 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1353 if (!SameStructure)
1354 continue;
1355
1356 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1357 ValueNumberMappingB);
1358 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1359 CurrentGroupPair->second.push_back(*InnerCandIt);
1360 }
1361 }
1362}
1363
1364void IRSimilarityIdentifier::findCandidates(
1365 std::vector<IRInstructionData *> &InstrList,
1366 std::vector<unsigned> &IntegerMapping) {
1367 SuffixTree ST(IntegerMapping);
1368
1369 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1370 std::vector<SimilarityGroup> NewCandidateGroups;
1371
1372 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1373 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1374 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1375
1376 // Iterate over the subsequences found by the Suffix Tree to create
1377 // IRSimilarityCandidates for each repeated subsequence and determine which
1378 // instances are structurally similar to one another.
1379
1380 // Sort the suffix tree from longest substring to shortest.
1381 std::vector<SuffixTree::RepeatedSubstring> RSes;
1382 for (SuffixTree::RepeatedSubstring &RS : ST)
1383 RSes.push_back(RS);
1384
1385 llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1386 const SuffixTree::RepeatedSubstring &RHS) {
1387 return LHS.Length > RHS.Length;
1388 });
1389 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1390 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1391 CandsForRepSubstring);
1392
1393 if (CandsForRepSubstring.size() < 2)
1394 continue;
1395
1396 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1397 IndexToIncludedCand, CandToGroup);
1398 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1399 // We only add the group if it contains more than one
1400 // IRSimilarityCandidate. If there is only one, that means there is no
1401 // other repeated subsequence with the same structure.
1402 if (Group.second.size() > 1) {
1403 SimilarityCandidates->push_back(Group.second);
1404 // Iterate over each candidate in the group, and add an entry for each
1405 // instruction included with a mapping to a set of
1406 // IRSimilarityCandidates that include that instruction.
1407 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1408 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1409 Idx <= Edx; ++Idx)
1410 IndexToIncludedCand[Idx].insert(&IRCand);
1411 // Add mapping of candidate to the overall similarity group number.
1412 CandToGroup.insert(
1413 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1414 }
1415 }
1416 }
1417
1418 CandsForRepSubstring.clear();
1419 StructuralGroups.clear();
1420 NewCandidateGroups.clear();
1421 }
1422}
1423
1425 ArrayRef<std::unique_ptr<Module>> Modules) {
1427
1428 std::vector<IRInstructionData *> InstrList;
1429 std::vector<unsigned> IntegerMapping;
1430 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1431 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1432 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1433 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1434 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1435
1436 populateMapper(Modules, InstrList, IntegerMapping);
1437 findCandidates(InstrList, IntegerMapping);
1438
1439 return *SimilarityCandidates;
1440}
1441
1444 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1445 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1446 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1447 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1448 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1449
1450 std::vector<IRInstructionData *> InstrList;
1451 std::vector<unsigned> IntegerMapping;
1452
1453 populateMapper(M, InstrList, IntegerMapping);
1454 findCandidates(InstrList, IntegerMapping);
1455
1456 return *SimilarityCandidates;
1457}
1458
1460 "ir-similarity-identifier", false, true)
1461
1463 : ModulePass(ID) {}
1464
1471
1473 IRSI.reset();
1474 return false;
1475}
1476
1478 IRSI->findSimilarity(M);
1479 return false;
1480}
1481
1482AnalysisKey IRSimilarityAnalysis::Key;
1487 false);
1488 IRSI.findSimilarity(M);
1489 return IRSI;
1490}
1491
1495 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1496 IRSI.getSimilarity();
1497
1498 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1499 OS << CandVec.size() << " candidates of length "
1500 << CandVec.begin()->getLength() << ". Found in: \n";
1501 for (IRSimilarityCandidate &Cand : CandVec) {
1502 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1503 << ", Basic Block: ";
1504 if (Cand.front()->Inst->getParent()->getName().str() == "")
1505 OS << "(unnamed)";
1506 else
1507 OS << Cand.front()->Inst->getParent()->getName().str();
1508 OS << "\n Start Instruction: ";
1509 Cand.frontInstruction()->print(OS);
1510 OS << "\n End Instruction: ";
1511 Cand.backInstruction()->print(OS);
1512 OS << "\n";
1513 }
1514 }
1515
1516 return PreservedAnalyses::all();
1517}
1518
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines generic set operations that may be used on set's of different types,...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const_pointer iterator
Definition ArrayRef.h:47
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
bool erase(const KeyT &Val)
Definition DenseMap.h:379
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:135
unsigned size() const
Definition DenseMap.h:174
bool empty() const
Definition DenseMap.h:173
iterator begin()
Definition DenseMap.h:139
iterator end()
Definition DenseMap.h:143
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
void swap(DerivedT &RHS)
Definition DenseMap.h:439
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Class to represent function types.
ArrayRef< Type * > params() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
LLVM_ABI void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static LLVM_ABI bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static LLVM_ABI bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static LLVM_ABI bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static LLVM_ABI bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static LLVM_ABI void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static LLVM_ABI bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
LLVM_ABI IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static LLVM_ABI bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static LLVM_ABI bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
iterator find(const_arg_type_t< ValueT > V)
Definition DenseSet.h:177
size_type size() const
Definition DenseSet.h:87
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:185
const ParentTy * getParent() const
Definition ilist_node.h:34
ilist_select_iterator_type< OptionsT, false, false > iterator
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
LLVM_ABI bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
LLVM_ABI StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
ArrayRef(const T &OneElt) -> ArrayRef< T >
static cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
This provides the utilities for hashing an Instruction to an unsigned integer.
LLVM_ABI StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
LLVM_ABI void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
LLVM_ABI IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
LLVM_ABI ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static LLVM_ABI CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
LLVM_ABI void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
LLVM_ABI void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
bool Legal
The legality of the wrapped instruction.
LLVM_ABI void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
LLVM_ABI CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
LLVM_ABI IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
LLVM_ABI IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
LLVM_ABI unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
LLVM_ABI void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
LLVM_ABI unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
A repeated substring in the tree.
Definition SuffixTree.h:50