40 #define LDIST_NAME "loop-distribute"
41 #define DEBUG_TYPE LDIST_NAME
47 cl::desc(
"Turn on DominatorTree and LoopInfo verification "
48 "after Loop Distribution"),
52 "loop-distribute-non-if-convertible",
cl::Hidden,
53 cl::desc(
"Whether to distribute into a loop that may not be "
54 "if-convertible by the loop vectorizer"),
57 STATISTIC(NumLoopsDistributed,
"Number of loops distributed");
67 : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) {
72 bool hasDepCycle()
const {
return DepCycle; }
78 InstructionSet::iterator
begin() {
return Set.
begin(); }
79 InstructionSet::iterator
end() {
return Set.
end(); }
80 InstructionSet::const_iterator
begin()
const {
return Set.
begin(); }
81 InstructionSet::const_iterator
end()
const {
return Set.
end(); }
82 bool empty()
const {
return Set.empty(); }
86 void moveTo(InstPartition &
Other) {
87 Other.Set.insert(Set.begin(), Set.end());
89 Other.DepCycle |= DepCycle;
94 void populateUsedSet() {
98 for (
auto *B : OrigLoop->getBlocks())
99 Set.insert(B->getTerminator());
104 while (!Worklist.empty()) {
109 if (I && OrigLoop->contains(I->
getParent()) && Set.insert(I).second)
110 Worklist.push_back(I);
124 LI, DT, ClonedLoopBlocks);
130 const Loop *getClonedLoop()
const {
return ClonedLoop; }
135 const Loop *getDistributedLoop()
const {
136 return ClonedLoop ? ClonedLoop : OrigLoop;
144 void remapInstructions() {
150 void removeUnusedInsts() {
153 for (
auto *
Block : OrigLoop->getBlocks())
154 for (
auto &Inst : *
Block)
155 if (!Set.count(&Inst)) {
158 NewInst = cast<Instruction>(VMap[NewInst]);
160 assert(!isa<BranchInst>(NewInst) &&
161 "Branches are marked used early on");
167 for (
auto I = Unused.
rbegin(), E = Unused.
rend(); I != E; ++
I) {
170 if (!Inst->use_empty())
172 Inst->eraseFromParent();
178 dbgs() <<
" (cycle)\n";
184 void printBlocks()
const {
185 for (
auto *BB : getDistributedLoop()->getBlocks())
215 class InstPartitionContainer {
220 : L(L), LI(LI), DT(DT) {}
223 unsigned getSize()
const {
return PartitionContainer.size(); }
229 if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
230 PartitionContainer.emplace_back(Inst, L,
true);
232 PartitionContainer.back().add(Inst);
240 void addToNewNonCyclicPartition(
Instruction *Inst) {
241 PartitionContainer.emplace_back(Inst, L);
249 void mergeAdjacentNonCyclic() {
250 mergeAdjacentPartitionsIf(
251 [](
const InstPartition *
P) {
return !P->hasDepCycle(); });
256 void mergeNonIfConvertible() {
257 mergeAdjacentPartitionsIf([&](
const InstPartition *Partition) {
258 if (Partition->hasDepCycle())
262 bool seenStore =
false;
264 for (
auto *Inst : *Partition)
265 if (isa<StoreInst>(Inst)) {
275 void mergeBeforePopulating() {
276 mergeAdjacentNonCyclic();
278 mergeNonIfConvertible();
288 bool mergeToAvoidDuplicatedLoads() {
292 LoadToPartitionT LoadToPartition;
293 ToBeMergedT ToBeMerged;
298 for (PartitionContainerT::iterator I = PartitionContainer.begin(),
299 E = PartitionContainer.end();
306 if (isa<LoadInst>(Inst)) {
308 LoadToPartitionT::iterator LoadToPart;
310 std::tie(LoadToPart, NewElt) =
311 LoadToPartition.insert(std::make_pair(Inst, PartI));
313 DEBUG(
dbgs() <<
"Merging partitions due to this load in multiple "
314 <<
"partitions: " << PartI <<
", "
315 << LoadToPart->second <<
"\n" << *Inst <<
"\n");
320 ToBeMerged.unionSets(PartI, &*PartJ);
321 }
while (&*PartJ != LoadToPart->second);
325 if (ToBeMerged.empty())
330 for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
335 auto PartI = I->getData();
336 for (
auto PartJ :
make_range(std::next(ToBeMerged.member_begin(I)),
337 ToBeMerged.member_end())) {
338 PartJ->moveTo(*PartI);
343 PartitionContainer.remove_if(
344 [](
const InstPartition &P) {
return P.empty(); });
351 void setupPartitionIdOnInstructions() {
353 for (
const auto &Partition : PartitionContainer) {
356 InstToPartitionIdT::iterator Iter;
358 std::tie(Iter, NewElt) =
359 InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
369 void populateUsedSet() {
370 for (
auto &P : PartitionContainer)
376 void cloneLoops(
Pass *P) {
381 assert(Pred &&
"Preheader does not have a single predecessor");
383 assert(ExitBlock &&
"No single exit block");
386 assert(!PartitionContainer.empty() &&
"at least two partitions expected");
390 "preheader not empty");
396 unsigned Index = getSize() - 1;
397 for (
auto I = std::next(PartitionContainer.rbegin()),
398 E = PartitionContainer.rend();
402 NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
404 Part->getVMap()[ExitBlock] = TopPH;
405 Part->remapInstructions();
412 for (
auto Curr = PartitionContainer.cbegin(),
413 Next = std::next(PartitionContainer.cbegin()),
414 E = PartitionContainer.cend();
415 Next != E; ++Curr, ++Next)
417 Next->getDistributedLoop()->getLoopPreheader(),
418 Curr->getDistributedLoop()->getExitingBlock());
422 void removeUnusedInsts() {
423 for (
auto &Partition : PartitionContainer)
424 Partition.removeUnusedInsts();
437 unsigned N = RtPtrCheck->
Pointers.size();
439 for (
unsigned I = 0; I <
N; ++
I) {
444 int &Partition = PtrToPartitions[
I];
450 int ThisPartition = this->InstToPartitionId[Inst];
452 Partition = ThisPartition;
454 else if (Partition == -1)
456 else if (Partition != (
int)ThisPartition)
459 assert(Partition != -2 &&
"Pointer not belonging to any partition");
462 return PtrToPartitions;
467 for (
const auto &P : PartitionContainer) {
468 OS <<
"Partition " << Index++ <<
" (" << &P <<
"):\n";
473 void dump()
const { print(
dbgs()); }
477 const InstPartitionContainer &Partitions) {
478 Partitions.print(OS);
483 void printBlocks()
const {
485 for (
const auto &P : PartitionContainer) {
486 dbgs() <<
"\nPartition " << Index++ <<
" (" << &P <<
"):\n";
492 typedef std::list<InstPartition> PartitionContainerT;
495 PartitionContainerT PartitionContainer;
499 InstToPartitionIdT InstToPartitionId;
507 template <
class UnaryPredicate>
508 void mergeAdjacentPartitionsIf(UnaryPredicate
Predicate) {
509 InstPartition *PrevMatch =
nullptr;
510 for (
auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
512 if (PrevMatch ==
nullptr && DoesMatch) {
515 }
else if (PrevMatch !=
nullptr && DoesMatch) {
516 I->moveTo(*PrevMatch);
517 I = PartitionContainer.erase(I);
532 class MemoryInstructionDependences {
538 unsigned NumUnsafeDependencesStartOrEnd;
540 Entry(
Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {}
545 AccessesType::const_iterator
begin()
const {
return Accesses.
begin(); }
546 AccessesType::const_iterator
end()
const {
return Accesses.
end(); }
548 MemoryInstructionDependences(
551 Accesses.append(Instructions.
begin(), Instructions.
end());
553 DEBUG(
dbgs() <<
"Backward dependences:\n");
554 for (
auto &Dep : InterestingDependences)
555 if (Dep.isPossiblyBackward()) {
559 ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
560 --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
562 DEBUG(Dep.print(
dbgs(), 2, Instructions));
567 AccessesType Accesses;
577 for (
auto &Inst : *Block) {
580 auto *
Use = cast<Instruction>(U);
596 bool runOnFunction(
Function &
F)
override {
597 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
598 LAA = &getAnalysis<LoopAccessAnalysis>();
599 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
606 for (
Loop *TopLevelLoop : *LI)
613 bool Changed =
false;
614 for (
Loop *L : Worklist)
615 Changed |= processLoop(L);
633 bool processLoop(
Loop *L) {
634 assert(L->
empty() &&
"Only process inner loops.");
637 <<
"\" checking " << *L <<
"\n");
641 DEBUG(
dbgs() <<
"Skipping; no preheader");
645 DEBUG(
dbgs() <<
"Skipping; multiple exit blocks");
655 DEBUG(
dbgs() <<
"Skipping; memory operations are safe for vectorization");
658 auto *InterestingDependences =
660 if (!InterestingDependences || InterestingDependences->empty()) {
661 DEBUG(
dbgs() <<
"Skipping; No unsafe dependences to isolate");
665 InstPartitionContainer Partitions(L, LI, DT);
688 *InterestingDependences);
690 int NumUnsafeDependencesActive = 0;
691 for (
auto &InstDep : MID) {
695 if (NumUnsafeDependencesActive ||
696 InstDep.NumUnsafeDependencesStartOrEnd > 0)
697 Partitions.addToCyclicPartition(I);
699 Partitions.addToNewNonCyclicPartition(I);
700 NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
701 assert(NumUnsafeDependencesActive >= 0 &&
702 "Negative number of dependences active");
710 auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
711 for (
auto *Inst : DefsUsedOutside)
712 Partitions.addToNewNonCyclicPartition(Inst);
714 DEBUG(
dbgs() <<
"Seeded partitions:\n" << Partitions);
715 if (Partitions.getSize() < 2)
720 Partitions.mergeBeforePopulating();
721 DEBUG(
dbgs() <<
"\nMerged partitions:\n" << Partitions);
722 if (Partitions.getSize() < 2)
726 Partitions.populateUsedSet();
727 DEBUG(
dbgs() <<
"\nPopulated partitions:\n" << Partitions);
731 if (Partitions.mergeToAvoidDuplicatedLoads()) {
732 DEBUG(
dbgs() <<
"\nPartitions merged to ensure unique loads:\n"
734 if (Partitions.getSize() < 2)
738 DEBUG(
dbgs() <<
"\nDistributing loop: " << *L <<
"\n");
741 Partitions.setupPartitionIdOnInstructions();
746 if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
751 auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
753 if (LVer.needsRuntimeChecks()) {
756 LVer.versionLoop(
this);
757 LVer.addPHINodes(DefsUsedOutside);
762 Partitions.cloneLoops(
this);
766 Partitions.removeUnusedInsts();
767 DEBUG(
dbgs() <<
"\nAfter removing unused Instrs:\n");
768 DEBUG(Partitions.printBlocks());
775 ++NumLoopsDistributed;
Pass interface - Implemented by all 'passes'.
void push_back(const T &Elt)
const_iterator end(StringRef path)
Get end iterator over path.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
SplitBlock - Split the specified block at the specified instruction - every thing before SplitPt stay...
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be ""if-convertible by the loop vectorizer"), cl::init(false))
const_iterator begin(StringRef path)
Get begin iterator over path.
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
iv Induction Variable Users
BlockT * getExitBlock() const
getExitBlock - If getExitBlocks would return exactly one block, return that block.
const std::vector< BlockT * > & getBlocks() const
getBlocks - Get a list of the basic blocks which make up this loop.
BlockT * getHeader() const
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
DenseMap< const Value *, Value * > ValueToValueMap
StringRef getName() const
Return a constant reference to the value's name.
iterator begin()
Instruction iterator methods.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
const SmallVectorImpl< Dependence > * getInterestingDependences() const
Returns the interesting dependences.
FunctionPass * createLoopDistributePass()
void print(raw_ostream &OS, unsigned Depth=0, const SmallVectorImpl< int > *PtrPartition=nullptr) const
Print the list run-time memory checks necessary.
const RuntimePointerChecking * getRuntimePointerChecking() const
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
initializer< Ty > init(const Ty &Val)
friend const_iterator end(StringRef path)
Get end iterator over path.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
BlockT * getLoopPreheader() const
getLoopPreheader - If there is a preheader for this loop, return it.
LLVM Basic Block Representation.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
FunctionPass class - This class is used to implement most global optimizations.
void initializeLoopDistributePass(PassRegistry &)
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
friend const_iterator begin(StringRef path)
Get begin iterator over path.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< user_iterator > users()
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
static const char ldist_name[]
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
This analysis provides dependence information for the memory accesses of a loop.
Dependece between memory access instructions.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
iterator_range< df_iterator< T > > depth_first(const T &G)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification ""after Loop Distribution"), cl::init(false))
reverse_iterator rbegin()
LLVM Value Representation.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
This class implements an extremely fast bulk output stream that can only output to a stream...
The legacy pass manager's analysis pass to compute loop information.
virtual void print(raw_ostream &O, const Module *M) const
print - Print out the internal state of the pass.
Legacy analysis pass which computes a DominatorTree.
Dependence - This class represents a dependence between two memory memory references in a function...
const BasicBlock * getParent() const