90#define DEBUG_TYPE "mldst-motion"
96class MergedLoadStoreMotion {
103 const int MagicCompileTimeControl = 250;
105 const bool SplitFooterBB;
107 MergedLoadStoreMotion(
bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
116 bool isStoreSinkBarrierInRange(
const Instruction &Start,
129 assert(isDiamondHead(BB) &&
"Basic block is not head of a diamond");
136bool MergedLoadStoreMotion::isDiamondHead(
BasicBlock *BB) {
140 if (!BI || !BI->isConditional())
154 if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
168bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(
const Instruction &Start,
175 return AA->canInstructionRangeModRef(Start,
End, Loc, ModRefInfo::ModRef);
188 auto *Store1 = dyn_cast<StoreInst>(&Inst);
195 if (AA->isMustAlias(Loc0, Loc1) &&
196 !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->
back(), Loc1) &&
197 !isStoreSinkBarrierInRange(*Store0->
getNextNode(), BB0->back(), Loc0) &&
201 Store1->getValueOperand()->getType(),
215 Value *Opd2 =
S1->getValueOperand();
220 NewPN->insertBefore(BB->
begin());
221 NewPN->applyMergedLocation(S0->
getDebugLoc(),
S1->getDebugLoc());
222 NewPN->addIncoming(Opd1, S0->
getParent());
223 NewPN->addIncoming(Opd2,
S1->getParent());
230bool MergedLoadStoreMotion::canSinkStoresAndGEPs(
StoreInst *S0,
235 auto *GEP1 = dyn_cast<GetElementPtrInst>(
S1->getPointerOperand());
236 return GEP0 && GEP1 && GEP0->isIdenticalTo(GEP1) && GEP0->hasOneUse() &&
237 (GEP0->getParent() == S0->
getParent()) && GEP1->hasOneUse() &&
238 (GEP1->getParent() ==
S1->getParent());
249 Value *Ptr1 =
S1->getPointerOperand();
252 dbgs() <<
"Instruction Left\n"; S0->
dump();
dbgs() <<
"\n";
266 S1->getValueOperand()->getType());
273 if (
PHINode *NewPN = getPHIOperand(BB, S0,
S1))
276 S1->eraseFromParent();
279 auto *GEP0 = cast<GetElementPtrInst>(Ptr0);
280 auto *GEP1 = cast<GetElementPtrInst>(Ptr1);
285 GEP0->replaceAllUsesWith(GEPNew);
286 GEP0->eraseFromParent();
287 GEP1->replaceAllUsesWith(GEPNew);
288 GEP1->eraseFromParent();
298bool MergedLoadStoreMotion::mergeStores(
BasicBlock *HeadBB) {
300 bool MergedStores =
false;
303 assert(SinkBB &&
"Footer of a diamond cannot be empty");
306 assert(SI !=
succ_end(HeadBB) &&
"Diamond head cannot have zero successors");
309 assert(SI !=
succ_end(HeadBB) &&
"Diamond head cannot have single successor");
319 int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
329 auto *S0 = dyn_cast<StoreInst>(
I);
334 if (NStores * Size1 >= MagicCompileTimeControl)
337 if (!canSinkStoresAndGEPs(S0,
S1))
353 sinkStoresAndGEPs(SinkBB, S0,
S1);
365 bool Changed =
false;
375 if (isDiamondHead(&BB))
376 Changed |= mergeStores(&BB);
384 if (!Impl.run(
F, AA))
396 OS, MapClassName2PassName);
This is the interface for a simple mod/ref and alias analysis over globals.
This pass performs merges of loads and stores on both sides of a.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction & back() const
Represents analyses that only rely on functions' control flow.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool hasSameSpecialState(const Instruction *I2, bool IgnoreAlignment=false) const LLVM_READONLY
This function determines if the speficied instruction has the same "special" characteristics as the c...
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs=std::nullopt)
Drop all unknown metadata except for debug locations.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
LLVM_DUMP_METHOD void dump() const
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
void dump() const
Support for debugging, callable in GDB: V->dump()
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
A CRTP mix-in to automatically provide informational APIs needed for passes.