38 using namespace llvm::PatternMatch;
40 #define DEBUG_TYPE "early-cse"
42 STATISTIC(NumSimplify,
"Number of instructions simplified or DCE'd");
43 STATISTIC(NumCSE,
"Number of instructions CSE'd");
44 STATISTIC(NumCSECVP,
"Number of compare instructions CVP'd");
45 STATISTIC(NumCSELoad,
"Number of load instructions CSE'd");
46 STATISTIC(NumCSECall,
"Number of call instructions CSE'd");
47 STATISTIC(NumDSE,
"Number of trivial dead stores removed");
59 assert((isSentinel() || canHandle(I)) &&
"Inst can't be handled!");
62 bool isSentinel()
const {
69 if (
CallInst *CI = dyn_cast<CallInst>(Inst))
70 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
71 return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
72 isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
73 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
74 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
75 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
88 static unsigned getHashValue(SimpleValue Val);
89 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
97 Value *LHS = BinOp->getOperand(0);
98 Value *RHS = BinOp->getOperand(1);
99 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
105 if (
CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
106 Value *LHS = CI->getOperand(0);
107 Value *RHS = CI->getOperand(1);
111 Pred = CI->getSwappedPredicate();
116 if (
CastInst *CI = dyn_cast<CastInst>(Inst))
117 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
120 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
124 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
128 assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
129 isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
130 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
131 isa<ShuffleVectorInst>(Inst)) &&
132 "Invalid/unknown instruction");
143 if (LHS.isSentinel() || RHS.isSentinel())
146 if (LHSI->
getOpcode() != RHSI->getOpcode())
153 if (!LHSBinOp->isCommutative())
156 assert(isa<BinaryOperator>(RHSI) &&
157 "same opcode, but different instruction type?");
162 LHSBinOp->getOperand(1) == RHSBinOp->
getOperand(0);
164 if (
CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
165 assert(isa<CmpInst>(RHSI) &&
166 "same opcode, but different instruction type?");
167 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
170 LHSCmp->getOperand(1) == RHSCmp->
getOperand(0) &&
171 LHSCmp->getSwappedPredicate() == RHSCmp->
getPredicate();
188 assert((isSentinel() || canHandle(I)) &&
"Inst can't be handled!");
191 bool isSentinel()
const {
217 static unsigned getHashValue(CallValue Val);
218 static bool isEqual(CallValue LHS, CallValue RHS);
232 if (LHS.isSentinel() || RHS.isSentinel())
259 AllocatorTy> ScopedHTType;
267 ScopedHTType AvailableValues;
290 : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(
false),
291 IsInvariant(
false) {}
292 LoadValue(
Instruction *Inst,
unsigned Generation,
unsigned MatchingId,
293 bool IsAtomic,
bool IsInvariant)
294 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
295 IsAtomic(IsAtomic), IsInvariant(IsInvariant) {}
301 LoadMapAllocator> LoadHTType;
302 LoadHTType AvailableLoads;
310 CallHTType AvailableCalls;
313 unsigned CurrentGeneration;
318 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
328 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
329 CallHTType &AvailableCalls)
330 : Scope(AvailableValues), LoadScope(AvailableLoads),
331 CallScope(AvailableCalls) {}
334 NodeScope(
const NodeScope &) =
delete;
335 void operator=(
const NodeScope &) =
delete;
337 ScopedHTType::ScopeTy Scope;
338 LoadHTType::ScopeTy LoadScope;
339 CallHTType::ScopeTy CallScope;
348 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
349 CallHTType &AvailableCalls,
unsigned cg,
DomTreeNode *n,
351 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
352 EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
356 unsigned currentGeneration() {
return CurrentGeneration; }
357 unsigned childGeneration() {
return ChildGeneration; }
367 bool isProcessed() {
return Processed; }
368 void process() { Processed =
true; }
371 StackNode(
const StackNode &) =
delete;
372 void operator=(
const StackNode &) =
delete;
375 unsigned CurrentGeneration;
376 unsigned ChildGeneration;
386 class ParseMemoryInst {
389 : IsTargetMemInst(
false), Inst(Inst) {
392 IsTargetMemInst =
true;
394 bool isLoad()
const {
395 if (IsTargetMemInst)
return Info.ReadMem;
396 return isa<LoadInst>(Inst);
398 bool isStore()
const {
399 if (IsTargetMemInst)
return Info.WriteMem;
400 return isa<StoreInst>(Inst);
403 if (IsTargetMemInst) {
404 assert(Info.IsSimple &&
"need to refine IsSimple in TTI");
409 bool isUnordered()
const {
410 if (IsTargetMemInst) {
411 assert(Info.IsSimple &&
"need to refine IsSimple in TTI");
414 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
415 return LI->isUnordered();
416 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
417 return SI->isUnordered();
424 if (IsTargetMemInst) {
425 assert(Info.IsSimple &&
"need to refine IsSimple in TTI");
428 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
429 return LI->isVolatile();
430 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
431 return SI->isVolatile();
437 bool isInvariantLoad()
const {
438 if (
auto *LI = dyn_cast<LoadInst>(Inst))
443 bool isMatchingMemLoc(
const ParseMemoryInst &Inst)
const {
445 getMatchingId() == Inst.getMatchingId());
453 int getMatchingId()
const {
454 if (IsTargetMemInst)
return Info.MatchingId;
458 if (IsTargetMemInst)
return Info.PtrVal;
459 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
460 return LI->getPointerOperand();
461 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
462 return SI->getPointerOperand();
466 bool mayReadFromMemory()
const {
467 if (IsTargetMemInst)
return Info.ReadMem;
468 return Inst->mayReadFromMemory();
470 bool mayWriteToMemory()
const {
471 if (IsTargetMemInst)
return Info.WriteMem;
472 return Inst->mayWriteToMemory();
476 bool IsTargetMemInst;
484 if (
auto *LI = dyn_cast<LoadInst>(Inst))
486 if (
auto *SI = dyn_cast<StoreInst>(Inst))
487 return SI->getValueOperand();
488 assert(isa<IntrinsicInst>(Inst) &&
"Instruction not supported");
493 bool isSameMemGeneration(
unsigned EarlierGeneration,
unsigned LaterGeneration,
513 for (
unsigned I = 0; I < WorkQueue.
size(); ++
I) {
516 for (
auto *U : WI->
users())
517 if (
MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
520 MSSA->removeMemoryAccess(WI);
524 if (
all_of(MP->incoming_values(),
525 [=](
Use &
In) {
return In == FirstIn; }))
551 bool EarlyCSE::isSameMemGeneration(
unsigned EarlierGeneration,
552 unsigned LaterGeneration,
556 if (EarlierGeneration == LaterGeneration)
567 MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
568 return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
572 bool Changed =
false;
591 if (
auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
592 if (BI->isConditional())
593 if (
auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
594 if (SimpleValue::canHandle(CondInst)) {
595 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
596 auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
599 AvailableValues.insert(CondInst, ConditionalConstant);
600 DEBUG(
dbgs() <<
"EarlyCSE CVP: Add conditional value for '"
601 << CondInst->getName() <<
"' as " << *ConditionalConstant
602 <<
" in " << BB->
getName() <<
"\n");
608 NumCSECVP = NumCSECVP + Count;
627 DEBUG(
dbgs() <<
"EarlyCSE DCE: " << *Inst <<
'\n');
639 if (
match(Inst, m_Intrinsic<Intrinsic::assume>())) {
640 DEBUG(
dbgs() <<
"EarlyCSE skipping assumption: " << *Inst <<
'\n');
654 if (
match(Inst, m_Intrinsic<Intrinsic::invariant_start>()))
657 if (
match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
659 dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
662 if (SimpleValue::canHandle(CondI))
676 DEBUG(
dbgs() <<
"EarlyCSE Simplify: " << *Inst <<
" to: " << *V <<
'\n');
695 if (SimpleValue::canHandle(Inst)) {
697 if (
Value *V = AvailableValues.lookup(Inst)) {
698 DEBUG(
dbgs() <<
"EarlyCSE CSE: " << *Inst <<
" to: " << *V <<
'\n');
699 if (
auto *I = dyn_cast<Instruction>(V))
710 AvailableValues.insert(Inst, Inst);
714 ParseMemoryInst MemInst(Inst, TTI);
716 if (MemInst.isValid() && MemInst.isLoad()) {
719 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
731 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
732 if (InVal.DefInst !=
nullptr &&
733 InVal.MatchingId == MemInst.getMatchingId() &&
735 !MemInst.isVolatile() && MemInst.isUnordered() &&
737 InVal.IsAtomic >= MemInst.isAtomic() &&
738 (InVal.IsInvariant || MemInst.isInvariantLoad() ||
739 isSameMemGeneration(InVal.Generation, CurrentGeneration,
740 InVal.DefInst, Inst))) {
743 DEBUG(
dbgs() <<
"EarlyCSE CSE LOAD: " << *Inst
744 <<
" to: " << *InVal.DefInst <<
'\n');
756 AvailableLoads.insert(
757 MemInst.getPointerOperand(),
758 LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
759 MemInst.
isAtomic(), MemInst.isInvariantLoad()));
770 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
774 if (CallValue::canHandle(Inst)) {
777 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
778 if (InVal.first !=
nullptr &&
779 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
781 DEBUG(
dbgs() <<
"EarlyCSE CSE CALL: " << *Inst
782 <<
" to: " << *InVal.first <<
'\n');
793 AvailableCalls.insert(
794 Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
803 if (
FenceInst *FI = dyn_cast<FenceInst>(Inst))
814 if (MemInst.isValid() && MemInst.isStore()) {
815 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
817 InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
818 InVal.MatchingId == MemInst.getMatchingId() &&
820 !MemInst.isVolatile() && MemInst.isUnordered() &&
821 isSameMemGeneration(InVal.Generation, CurrentGeneration,
822 InVal.DefInst, Inst)) {
829 MemInst.getPointerOperand() ||
831 "can't have an intervening store if not using MemorySSA!");
832 DEBUG(
dbgs() <<
"EarlyCSE DSE (writeback): " << *Inst <<
'\n');
849 if (MemInst.isValid() && MemInst.isStore()) {
858 ParseMemoryInst LastStoreMemInst(LastStore, TTI);
859 assert(LastStoreMemInst.isUnordered() &&
860 !LastStoreMemInst.isVolatile() &&
861 "Violated invariant");
862 if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
863 DEBUG(
dbgs() <<
"EarlyCSE DEAD STORE: " << *LastStore
864 <<
" due to: " << *Inst <<
'\n');
865 removeMSSA(LastStore);
879 AvailableLoads.insert(
880 MemInst.getPointerOperand(),
881 LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
891 if (MemInst.isUnordered() && !MemInst.isVolatile())
902 bool EarlyCSE::run() {
908 std::deque<StackNode *> nodesToProcess;
910 bool Changed =
false;
913 nodesToProcess.push_back(
new StackNode(
914 AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
915 DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
918 unsigned LiveOutGeneration = CurrentGeneration;
921 while (!nodesToProcess.empty()) {
924 StackNode *NodeToProcess = nodesToProcess.back();
927 CurrentGeneration = NodeToProcess->currentGeneration();
930 if (!NodeToProcess->isProcessed()) {
932 Changed |= processNode(NodeToProcess->node());
933 NodeToProcess->childGeneration(CurrentGeneration);
934 NodeToProcess->process();
935 }
else if (NodeToProcess->childIter() != NodeToProcess->end()) {
938 nodesToProcess.push_back(
939 new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
940 NodeToProcess->childGeneration(), child, child->
begin(),
945 delete NodeToProcess;
946 nodesToProcess.pop_back();
951 CurrentGeneration = LiveOutGeneration;
965 EarlyCSE
CSE(TLI, TTI, DT, AC, MSSA);
988 template<
bool UseMemorySSA>
1000 bool runOnFunction(
Function &
F)
override {
1001 if (skipFunction(F))
1004 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1005 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1006 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1007 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1009 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1011 EarlyCSE
CSE(TLI, TTI, DT, AC, MSSA);
1044 using EarlyCSEMemSSALegacyPass =
1045 EarlyCSELegacyCommonPass<
true>;
1048 char EarlyCSEMemSSALegacyPass::
ID = 0;
1052 return new EarlyCSEMemSSALegacyPass();
1058 "Early CSE w/ MemorySSA",
false,
false)
Legacy wrapper pass to provide the GlobalsAAResult object.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
static ConstantInt * getFalse(LLVMContext &Context)
static SimpleValue getTombstoneKey()
This class is the base class for the comparison instructions.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
This is the interface for a simple mod/ref and alias analysis over globals.
An instruction for ordering other memory operations.
value_op_iterator value_op_begin()
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
static CallValue getTombstoneKey()
value_op_iterator value_op_end()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Analysis pass which computes a DominatorTree.
An instruction for reading from memory.
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass,"early-cse","Early CSE", false, false) using EarlyCSEMemSSALegacyPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
StringRef getName() const
Return a constant reference to the value's name.
iterator begin()
Instruction iterator methods.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
bool match(Val *V, const Pattern &P)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Legacy analysis pass which computes MemorySSA.
static Value * getPointerOperand(Instruction &Inst)
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
This is the base class for all instructions that perform data casts.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
early cse Early CSE w false
A Use represents the edge between a Value definition and its users.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Encapsulates MemorySSA, including all data associated with memory accesses.
static CallValue getEmptyKey()
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static bool isEqual(const Function &Caller, const Function &Callee)
This file provides the interface for a simple, fast CSE pass.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction...
Base class for the actual dominator tree node.
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Optimize for code generation
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template paramaters.
A set of analyses that are preserved following a run of a transformation pass.
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction's result is undefined.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
static SimpleValue getEmptyKey()
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Value * getOperand(unsigned i) const
Predicate getPredicate() const
Return the predicate for this instruction.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static bool isAtomic(Instruction *I)
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
A function analysis which provides an AssumptionCache.
Iterator for intrusive lists based on ilist_node.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
Provides information about what library functions are available for the current target.
INITIALIZE_PASS(HexagonGenMux,"hexagon-mux","Hexagon generate mux instructions", false, false) void HexagonGenMux I isValid()
An analysis that produces MemorySSA for a function.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ConstantInt * getTrue(LLVMContext &Context)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
iterator_range< user_iterator > users()
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void preserve()
Mark an analysis as preserved.
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< DomTreeNodeBase< NodeT > * >::iterator iterator
LLVMContext & getContext() const
Get the context in which this basic block lives.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
A container for analyses that lazily runs them and caches their results.
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
static bool isVolatile(Instruction *Inst)
Represents phi nodes for memory accesses.
Information about a load/store intrinsic defined by the target.
A wrapper class for inspecting calls to intrinsic functions.
bool isVoidTy() const
Return true if this is 'void'.
This instruction inserts a struct field of array element value into an aggregate value.