Go to the documentation of this file.
82 #define DEBUG_TYPE "gvn-sink"
84 STATISTIC(NumRemoved,
"Number of instructions removed");
87 namespace GVNExpression {
100 return isa<LoadInst>(
I) || isa<StoreInst>(
I) ||
101 (isa<InvokeInst>(
I) && !cast<InvokeInst>(
I)->doesNotAccessMemory()) ||
102 (isa<CallInst>(
I) && !cast<CallInst>(
I)->doesNotAccessMemory());
117 class LockstepReverseIterator {
130 ActiveBlocks.
clear();
135 if (
BB->size() <= 1) {
140 Insts.push_back(
BB->getTerminator()->getPrevNode());
158 for (
auto II = Insts.begin(); II != Insts.end();) {
160 ActiveBlocks.
remove((*II)->getParent());
161 II = Insts.
erase(II);
172 for (
auto *Inst : Insts) {
173 if (Inst == &Inst->getParent()->front())
174 ActiveBlocks.
remove(Inst->getParent());
176 NewInsts.push_back(Inst->getPrevNode());
178 if (NewInsts.empty()) {
192 struct SinkingInstructionCandidate {
194 unsigned NumInstructions;
196 unsigned NumMemoryInsts;
200 void calculateCost(
unsigned NumOrigPHIs,
unsigned NumOrigBlocks) {
201 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
202 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
203 Cost = (NumInstructions * (NumBlocks - 1)) -
210 return Cost >
Other.Cost;
216 OS <<
"<Candidate Cost=" <<
C.Cost <<
" #Blocks=" <<
C.NumBlocks
217 <<
" #Insts=" <<
C.NumInstructions <<
" #PHIs=" <<
C.NumPHIs <<
">";
232 ModelledPHI() =
default;
234 ModelledPHI(
const PHINode *PN) {
240 for (
auto &
P : Ops) {
241 Blocks.push_back(
P.first);
242 Values.push_back(
P.second);
249 static ModelledPHI createDummy(
size_t ID) {
251 M.Values.push_back(
reinterpret_cast<Value*
>(
ID));
256 template <
typename VArray,
typename BArray>
257 ModelledPHI(
const VArray &V,
const BArray &
B) {
263 template <
typename BArray>
266 for (
auto *
I : Insts)
267 Values.push_back(
I->getOperand(OpNum));
273 auto BI = Blocks.begin();
274 auto VI = Values.begin();
275 while (BI != Blocks.end()) {
278 BI = Blocks.
erase(BI);
290 bool areAllIncomingValuesSame()
const {
294 bool areAllIncomingValuesSameType()
const {
299 bool areAnyIncomingValuesConstant()
const {
304 unsigned hash()
const {
309 return Values ==
Other.Values && Blocks ==
Other.Blocks;
314 static inline ModelledPHI &getEmptyKey() {
315 static ModelledPHI
Dummy = ModelledPHI::createDummy(0);
319 static inline ModelledPHI &getTombstoneKey() {
320 static ModelledPHI
Dummy = ModelledPHI::createDummy(1);
324 static unsigned getHashValue(
const ModelledPHI &V) {
return V.hash(); }
326 static bool isEqual(
const ModelledPHI &
LHS,
const ModelledPHI &
RHS) {
347 unsigned MemoryUseOrder = -1;
354 : GVNExpression::BasicExpression(
I->getNumUses()) {
360 ShuffleMask = SVI->getShuffleMask().
copy(
A);
362 for (
auto &U :
I->uses())
367 void setMemoryUseOrder(
unsigned MUO) { MemoryUseOrder = MUO; }
368 void setVolatile(
bool V) {
Volatile = V; }
371 return hash_combine(GVNExpression::BasicExpression::getHashValue(),
372 MemoryUseOrder, Volatile, ShuffleMask);
393 BasicBlocksSet ReachableBBs;
399 InstructionUseExpr *
E =
402 E->setMemoryUseOrder(getMemoryUseOrder(
I));
414 template <
class Inst> InstructionUseExpr *createMemoryExpr(Inst *
I) {
417 InstructionUseExpr *
E = createExpr(
I);
418 E->setVolatile(
I->isVolatile());
426 void setReachableBBs(
const BasicBlocksSet &ReachableBBs) {
427 this->ReachableBBs = ReachableBBs;
433 auto VI = ValueNumbering.
find(V);
434 if (
VI != ValueNumbering.
end())
437 if (!isa<Instruction>(V)) {
438 ValueNumbering[V] = nextValueNumber;
439 return nextValueNumber++;
443 if (!ReachableBBs.contains(
I->getParent()))
446 InstructionUseExpr *exp =
nullptr;
447 switch (
I->getOpcode()) {
449 exp = createMemoryExpr(cast<LoadInst>(
I));
452 exp = createMemoryExpr(cast<StoreInst>(
I));
455 case Instruction::Invoke:
456 case Instruction::FNeg:
458 case Instruction::FAdd:
459 case Instruction::Sub:
460 case Instruction::FSub:
462 case Instruction::FMul:
463 case Instruction::UDiv:
464 case Instruction::SDiv:
465 case Instruction::FDiv:
466 case Instruction::URem:
467 case Instruction::SRem:
468 case Instruction::FRem:
469 case Instruction::Shl:
470 case Instruction::LShr:
471 case Instruction::AShr:
472 case Instruction::And:
473 case Instruction::Or:
474 case Instruction::Xor:
475 case Instruction::ICmp:
476 case Instruction::FCmp:
477 case Instruction::Trunc:
478 case Instruction::ZExt:
479 case Instruction::SExt:
480 case Instruction::FPToUI:
481 case Instruction::FPToSI:
482 case Instruction::UIToFP:
483 case Instruction::SIToFP:
484 case Instruction::FPTrunc:
485 case Instruction::FPExt:
486 case Instruction::PtrToInt:
487 case Instruction::IntToPtr:
488 case Instruction::BitCast:
489 case Instruction::AddrSpaceCast:
491 case Instruction::ExtractElement:
492 case Instruction::InsertElement:
493 case Instruction::ShuffleVector:
494 case Instruction::InsertValue:
495 case Instruction::GetElementPtr:
503 ValueNumbering[V] = nextValueNumber;
504 return nextValueNumber++;
509 hash_code H = exp->getHashValue([=](
Value *V) {
return lookupOrAdd(V); });
510 auto I = HashNumbering.
find(
H);
511 if (
I != HashNumbering.
end()) {
514 e = nextValueNumber++;
515 HashNumbering[
H] =
e;
516 ExpressionNumbering[exp] =
e;
519 ValueNumbering[V] =
e;
526 auto VI = ValueNumbering.
find(V);
527 assert(
VI != ValueNumbering.
end() &&
"Value not numbered?");
533 ValueNumbering.
clear();
534 ExpressionNumbering.
clear();
535 HashNumbering.
clear();
553 I !=
E && !
I->isTerminator(); ++
I) {
554 if (!isMemoryInst(&*
I))
556 if (isa<LoadInst>(&*
I))
564 return lookupOrAdd(&*
I);
580 unsigned NumSunk = 0;
582 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
584 NumSunk += sinkBB(
N);
594 if (isa<PHINode>(
I) ||
I->isEHPad() || isa<AllocaInst>(
I) ||
595 I->getType()->isTokenTy())
604 LockstepReverseIterator &LRI,
unsigned &InstNum,
unsigned &MemoryInstNum,
608 void analyzeInitialPHIs(
BasicBlock *
BB, ModelledPHISet &PHIs,
611 auto MPHI = ModelledPHI(&PN);
613 for (
auto *V : MPHI.getValues())
628 auto I =
BB->begin();
629 while (
PHINode *PN = dyn_cast<PHINode>(
I++)) {
631 return V == PN->getIncomingValue(0);
644 LockstepReverseIterator &LRI,
unsigned &InstNum,
unsigned &MemoryInstNum,
647 LLVM_DEBUG(
dbgs() <<
" -- Analyzing instruction set: [\n";
for (
auto *
I
650 }
dbgs() <<
" ]\n";);
653 for (
auto *
I : Insts) {
655 LLVM_DEBUG(
dbgs() <<
" VN=" << Twine::utohexstr(
N) <<
" for" << *
I <<
"\n");
660 unsigned VNumToSink =
663 if (VNums[VNumToSink] == 1)
669 auto &ActivePreds = LRI.getActiveBlocks();
670 unsigned InitialActivePredSize = ActivePreds.size();
672 for (
auto *
I : Insts) {
673 if (VN.lookup(
I) != VNumToSink)
674 ActivePreds.remove(
I->getParent());
676 NewInsts.push_back(
I);
678 for (
auto *
I : NewInsts)
679 if (shouldAvoidSinkingInstruction(
I))
684 bool RecomputePHIContents =
false;
685 if (ActivePreds.size() != InitialActivePredSize) {
686 ModelledPHISet NewNeededPHIs;
687 for (
auto P : NeededPHIs) {
688 P.restrictToBlocks(ActivePreds);
689 NewNeededPHIs.insert(
P);
691 NeededPHIs = NewNeededPHIs;
692 LRI.restrictToBlocks(ActivePreds);
693 RecomputePHIContents =
true;
697 ModelledPHI NewPHI(NewInsts, ActivePreds);
700 if (NeededPHIs.erase(NewPHI))
701 RecomputePHIContents =
true;
703 if (RecomputePHIContents) {
707 for (
auto &PHI : NeededPHIs)
708 PHIContents.
insert(PHI.getValues().begin(), PHI.getValues().end());
713 for (
auto *V : NewPHI.getValues())
714 if (PHIContents.
count(V))
730 if (
any_of(NewInsts, hasDifferentNumOperands))
734 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
735 if (PHI.areAllIncomingValuesSame())
740 if (NeededPHIs.count(PHI))
742 if (!PHI.areAllIncomingValuesSameType())
745 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum ==
E - 1 &&
746 PHI.areAnyIncomingValuesConstant())
749 NeededPHIs.reserve(NeededPHIs.size());
750 NeededPHIs.insert(PHI);
751 PHIContents.
insert(PHI.getValues().begin(), PHI.getValues().end());
754 if (isMemoryInst(NewInsts[0]))
757 SinkingInstructionCandidate Cand;
758 Cand.NumInstructions = ++InstNum;
759 Cand.NumMemoryInsts = MemoryInstNum;
760 Cand.NumBlocks = ActivePreds.size();
761 Cand.NumPHIs = NeededPHIs.size();
772 auto *
T =
B->getTerminator();
773 if (isa<BranchInst>(
T) || isa<SwitchInst>(
T))
778 if (Preds.size() < 2)
782 unsigned NumOrigPreds = Preds.size();
785 return BB->getTerminator()->getNumSuccessors() != 1;
788 LockstepReverseIterator LRI(Preds);
790 unsigned InstNum = 0, MemoryInstNum = 0;
791 ModelledPHISet NeededPHIs;
793 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
794 unsigned NumOrigPHIs = NeededPHIs.size();
796 while (LRI.isValid()) {
797 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
798 NeededPHIs, PHIContents);
801 Cand->calculateCost(NumOrigPHIs, Preds.size());
809 <<
" " <<
C <<
"\n";);
812 if (Candidates.empty() || Candidates.front().Cost <= 0)
814 auto C = Candidates.front();
818 if (
C.Blocks.size() < NumOrigPreds) {
829 for (
unsigned I = 0;
I <
C.NumInstructions; ++
I)
832 return C.NumInstructions;
839 Insts.push_back(
BB->getTerminator()->getPrevNode());
854 assert(!
Op->getType()->isTokenTy() &&
"Can't PHI tokens!");
855 auto *PN = PHINode::Create(
Op->getType(), Insts.size(),
856 Op->getName() +
".sink", &BBEnd->
front());
857 for (
auto *
I : Insts)
859 NewOperands.push_back(PN);
869 for (
auto *
I : Insts)
875 for (
auto *
I : Insts)
877 I->replaceAllUsesWith(I0);
878 foldPointlessPHINodes(BBEnd);
881 for (
auto *
I : Insts)
883 I->eraseFromParent();
885 NumRemoved += Insts.size() - 1;
923 "Early GVN sinking of Expressions",
false,
false)
A set of analyses that are preserved following a run of a transformation pass.
Function object to check whether the second component of a std::pair compares less than the second co...
FunctionPass * createGVNSinkPass()
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This is an optimization pass for GlobalISel generic memory operations.
iterator erase(const_iterator CI)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
op_range incoming_values()
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
unsigned getOpcode() const
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
size_type size() const
Determine the number of elements in the SetVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", "Early GVN sinking of Expressions", false, false) INITIALIZE_PASS_END(GVNSinkLegacyPass
OutputIt copy(R &&Range, OutputIt Out)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static void clear(coro::Shape &Shape)
const Use & getOperandUse(unsigned i) const
bool remove(const value_type &X)
Remove an item from the set vector.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
An information struct used to provide DenseMap with the various necessary components for a given valu...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void clear(AllocatorType &Allocator)
clear - Release all the tracked allocations to the allocator.
void print(raw_ostream &OS) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< op_iterator > operands()
(vector float) vec_cmpeq(*A, *B) C
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Represent the analysis usage information of a pass.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
auto predecessors(MachineBasicBlock *BB)
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void op_push_back(Value *Arg)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool onlyReadsMemory(unsigned OpNo) const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This class is the base class for the comparison instructions.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Implements a dense probed hash-table based set.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
bool isStrongerThanUnordered(AtomicOrdering AO)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Allocate memory in an ever growing pool, as if by bump-pointer.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
void initializeGVNSinkLegacyPassPass(PassRegistry &)
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator==(uint64_t V1, const APInt &V2)
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
bool insert(const value_type &X)
Insert a new element into the SetVector.
APInt operator*(APInt a, uint64_t RHS)
gvn sink
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Type * getType() const
All values are typed, get the type of this value.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
self_iterator getIterator()
LLVM_DUMP_METHOD void dump() const
hash_code getHashValue() const override
Recycle small arrays allocated from a BumpPtrAllocator.
const Instruction & front() const
void clear()
Completely clear the SetVector.
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
void stable_sort(R &&Range)
Should compile to something r4 addze r3 instead we get
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void sort(IteratorTy Start, IteratorTy End)
void setOpcode(unsigned opcode)
static bool isEqual(const Function &Caller, const Function &Callee)
std::unique_ptr< ValueIDNum[]> ValueTable
Type for a table of values in a block.
This instruction constructs a fixed permutation of two input vectors.
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned getNumOperands() const
gvn Early GVN sinking of Expressions
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const BasicBlock * getParent() const
Legacy wrapper pass to provide the GlobalsAAResult object.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool operator>(int64_t V1, const APSInt &V2)
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A SetVector that performs no allocations if smaller than a certain size.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Value * getOperand(unsigned i) const
LLVM Value Representation.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Optional< std::vector< StOtherPiece > > Other
Add support for conditional and other related patterns Instead of
reference emplace_back(ArgTypes &&... Args)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
MutableArrayRef< T > copy(Allocator &A)
An opaque object representing a hash code.