91#define DEBUG_TYPE "mldst-motion"
97class MergedLoadStoreMotion {
104 const int MagicCompileTimeControl = 250;
106 const bool SplitFooterBB;
108 MergedLoadStoreMotion(
bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
117 bool isStoreSinkBarrierInRange(
const Instruction &Start,
130 assert(isDiamondHead(BB) &&
"Basic block is not head of a diamond");
137bool MergedLoadStoreMotion::isDiamondHead(
BasicBlock *BB) {
141 if (!BI || !BI->isConditional())
155 if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
169bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(
const Instruction &Start,
176 return AA->canInstructionRangeModRef(Start,
End, Loc, ModRefInfo::ModRef);
189 auto *Store1 = dyn_cast<StoreInst>(&Inst);
196 if (AA->isMustAlias(Loc0, Loc1) &&
197 !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->
back(), Loc1) &&
198 !isStoreSinkBarrierInRange(*Store0->
getNextNode(), BB0->back(), Loc0) &&
202 Store1->getValueOperand()->getType(),
216 Value *Opd2 =
S1->getValueOperand();
221 NewPN->insertBefore(BB->
begin());
222 NewPN->applyMergedLocation(S0->
getDebugLoc(),
S1->getDebugLoc());
223 NewPN->addIncoming(Opd1, S0->
getParent());
224 NewPN->addIncoming(Opd2,
S1->getParent());
231bool MergedLoadStoreMotion::canSinkStoresAndGEPs(
StoreInst *S0,
236 auto *GEP1 = dyn_cast<GetElementPtrInst>(
S1->getPointerOperand());
237 return GEP0 && GEP1 && GEP0->isIdenticalTo(GEP1) && GEP0->hasOneUse() &&
238 (GEP0->getParent() == S0->
getParent()) && GEP1->hasOneUse() &&
239 (GEP1->getParent() ==
S1->getParent());
250 Value *Ptr1 =
S1->getPointerOperand();
253 dbgs() <<
"Instruction Left\n"; S0->
dump();
dbgs() <<
"\n";
268 S1->getValueOperand()->getType());
275 if (
PHINode *NewPN = getPHIOperand(BB, S0,
S1))
278 S1->eraseFromParent();
281 auto *GEP0 = cast<GetElementPtrInst>(Ptr0);
282 auto *GEP1 = cast<GetElementPtrInst>(Ptr1);
287 GEP0->replaceAllUsesWith(GEPNew);
288 GEP0->eraseFromParent();
289 GEP1->replaceAllUsesWith(GEPNew);
290 GEP1->eraseFromParent();
300bool MergedLoadStoreMotion::mergeStores(
BasicBlock *HeadBB) {
302 bool MergedStores =
false;
305 assert(SinkBB &&
"Footer of a diamond cannot be empty");
308 assert(SI !=
succ_end(HeadBB) &&
"Diamond head cannot have zero successors");
311 assert(SI !=
succ_end(HeadBB) &&
"Diamond head cannot have single successor");
321 int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
331 auto *S0 = dyn_cast<StoreInst>(
I);
336 if (NStores * Size1 >= MagicCompileTimeControl)
339 if (!canSinkStoresAndGEPs(S0,
S1))
355 sinkStoresAndGEPs(SinkBB, S0,
S1);
367 bool Changed =
false;
377 if (isDiamondHead(&BB))
378 Changed |= mergeStores(&BB);
386 if (!Impl.run(
F, AA))
398 OS, MapClassName2PassName);
This is the interface for a simple mod/ref and alias analysis over globals.
This pass performs merges of loads and stores on both sides of a.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction & back() const
Represents analyses that only rely on functions' control flow.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
bool hasSameSpecialState(const Instruction *I2, bool IgnoreAlignment=false, bool IntersectAttrs=false) const LLVM_READONLY
This function determines if the speficied instruction has the same "special" characteristics as the c...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
LLVM_DUMP_METHOD void dump() const
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
void dump() const
Support for debugging, callable in GDB: V->dump()
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
A CRTP mix-in to automatically provide informational APIs needed for passes.