LLVM  14.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/InitializePasses.h"
58 #include "llvm/Pass.h"
59 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Transforms/Scalar.h"
69 #include <cassert>
70 #include <functional>
71 #include <list>
72 #include <tuple>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define LDIST_NAME "loop-distribute"
78 #define DEBUG_TYPE LDIST_NAME
79 
80 /// @{
81 /// Metadata attribute names
82 static const char *const LLVMLoopDistributeFollowupAll =
83  "llvm.loop.distribute.followup_all";
84 static const char *const LLVMLoopDistributeFollowupCoincident =
85  "llvm.loop.distribute.followup_coincident";
86 static const char *const LLVMLoopDistributeFollowupSequential =
87  "llvm.loop.distribute.followup_sequential";
88 static const char *const LLVMLoopDistributeFollowupFallback =
89  "llvm.loop.distribute.followup_fallback";
90 /// @}
91 
92 static cl::opt<bool>
93  LDistVerify("loop-distribute-verify", cl::Hidden,
94  cl::desc("Turn on DominatorTree and LoopInfo verification "
95  "after Loop Distribution"),
96  cl::init(false));
97 
99  "loop-distribute-non-if-convertible", cl::Hidden,
100  cl::desc("Whether to distribute into a loop that may not be "
101  "if-convertible by the loop vectorizer"),
102  cl::init(false));
103 
105  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
106  cl::desc("The maximum number of SCEV checks allowed for Loop "
107  "Distribution"));
108 
110  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
111  cl::Hidden,
112  cl::desc(
113  "The maximum number of SCEV checks allowed for Loop "
114  "Distribution for loop marked with #pragma loop distribute(enable)"));
115 
117  "enable-loop-distribute", cl::Hidden,
118  cl::desc("Enable the new, experimental LoopDistribution Pass"),
119  cl::init(false));
120 
121 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
122 
123 namespace {
124 
125 /// Maintains the set of instructions of the loop for a partition before
126 /// cloning. After cloning, it hosts the new loop.
127 class InstPartition {
128  using InstructionSet = SmallPtrSet<Instruction *, 8>;
129 
130 public:
131  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
132  : DepCycle(DepCycle), OrigLoop(L) {
133  Set.insert(I);
134  }
135 
136  /// Returns whether this partition contains a dependence cycle.
137  bool hasDepCycle() const { return DepCycle; }
138 
139  /// Adds an instruction to this partition.
140  void add(Instruction *I) { Set.insert(I); }
141 
142  /// Collection accessors.
143  InstructionSet::iterator begin() { return Set.begin(); }
144  InstructionSet::iterator end() { return Set.end(); }
145  InstructionSet::const_iterator begin() const { return Set.begin(); }
146  InstructionSet::const_iterator end() const { return Set.end(); }
147  bool empty() const { return Set.empty(); }
148 
149  /// Moves this partition into \p Other. This partition becomes empty
150  /// after this.
151  void moveTo(InstPartition &Other) {
152  Other.Set.insert(Set.begin(), Set.end());
153  Set.clear();
154  Other.DepCycle |= DepCycle;
155  }
156 
157  /// Populates the partition with a transitive closure of all the
158  /// instructions that the seeded instructions dependent on.
159  void populateUsedSet() {
160  // FIXME: We currently don't use control-dependence but simply include all
161  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
162  // up.
163  for (auto *B : OrigLoop->getBlocks())
164  Set.insert(B->getTerminator());
165 
166  // Follow the use-def chains to form a transitive closure of all the
167  // instructions that the originally seeded instructions depend on.
168  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
169  while (!Worklist.empty()) {
170  Instruction *I = Worklist.pop_back_val();
171  // Insert instructions from the loop that we depend on.
172  for (Value *V : I->operand_values()) {
173  auto *I = dyn_cast<Instruction>(V);
174  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
175  Worklist.push_back(I);
176  }
177  }
178  }
179 
180  /// Clones the original loop.
181  ///
182  /// Updates LoopInfo and DominatorTree using the information that block \p
183  /// LoopDomBB dominates the loop.
184  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
185  unsigned Index, LoopInfo *LI,
186  DominatorTree *DT) {
187  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
188  VMap, Twine(".ldist") + Twine(Index),
189  LI, DT, ClonedLoopBlocks);
190  return ClonedLoop;
191  }
192 
193  /// The cloned loop. If this partition is mapped to the original loop,
194  /// this is null.
195  const Loop *getClonedLoop() const { return ClonedLoop; }
196 
197  /// Returns the loop where this partition ends up after distribution.
198  /// If this partition is mapped to the original loop then use the block from
199  /// the loop.
200  Loop *getDistributedLoop() const {
201  return ClonedLoop ? ClonedLoop : OrigLoop;
202  }
203 
204  /// The VMap that is populated by cloning and then used in
205  /// remapinstruction to remap the cloned instructions.
206  ValueToValueMapTy &getVMap() { return VMap; }
207 
208  /// Remaps the cloned instructions using VMap.
209  void remapInstructions() {
210  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
211  }
212 
213  /// Based on the set of instructions selected for this partition,
214  /// removes the unnecessary ones.
215  void removeUnusedInsts() {
217 
218  for (auto *Block : OrigLoop->getBlocks())
219  for (auto &Inst : *Block)
220  if (!Set.count(&Inst)) {
221  Instruction *NewInst = &Inst;
222  if (!VMap.empty())
223  NewInst = cast<Instruction>(VMap[NewInst]);
224 
225  assert(!isa<BranchInst>(NewInst) &&
226  "Branches are marked used early on");
227  Unused.push_back(NewInst);
228  }
229 
230  // Delete the instructions backwards, as it has a reduced likelihood of
231  // having to update as many def-use and use-def chains.
232  for (auto *Inst : reverse(Unused)) {
233  if (!Inst->use_empty())
234  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
235  Inst->eraseFromParent();
236  }
237  }
238 
239  void print() const {
240  if (DepCycle)
241  dbgs() << " (cycle)\n";
242  for (auto *I : Set)
243  // Prefix with the block name.
244  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
245  }
246 
247  void printBlocks() const {
248  for (auto *BB : getDistributedLoop()->getBlocks())
249  dbgs() << *BB;
250  }
251 
252 private:
253  /// Instructions from OrigLoop selected for this partition.
254  InstructionSet Set;
255 
256  /// Whether this partition contains a dependence cycle.
257  bool DepCycle;
258 
259  /// The original loop.
260  Loop *OrigLoop;
261 
262  /// The cloned loop. If this partition is mapped to the original loop,
263  /// this is null.
264  Loop *ClonedLoop = nullptr;
265 
266  /// The blocks of ClonedLoop including the preheader. If this
267  /// partition is mapped to the original loop, this is empty.
268  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
269 
270  /// These gets populated once the set of instructions have been
271  /// finalized. If this partition is mapped to the original loop, these are not
272  /// set.
273  ValueToValueMapTy VMap;
274 };
275 
276 /// Holds the set of Partitions. It populates them, merges them and then
277 /// clones the loops.
278 class InstPartitionContainer {
279  using InstToPartitionIdT = DenseMap<Instruction *, int>;
280 
281 public:
282  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
283  : L(L), LI(LI), DT(DT) {}
284 
285  /// Returns the number of partitions.
286  unsigned getSize() const { return PartitionContainer.size(); }
287 
288  /// Adds \p Inst into the current partition if that is marked to
289  /// contain cycles. Otherwise start a new partition for it.
290  void addToCyclicPartition(Instruction *Inst) {
291  // If the current partition is non-cyclic. Start a new one.
292  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
293  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
294  else
295  PartitionContainer.back().add(Inst);
296  }
297 
298  /// Adds \p Inst into a partition that is not marked to contain
299  /// dependence cycles.
300  ///
301  // Initially we isolate memory instructions into as many partitions as
302  // possible, then later we may merge them back together.
303  void addToNewNonCyclicPartition(Instruction *Inst) {
304  PartitionContainer.emplace_back(Inst, L);
305  }
306 
307  /// Merges adjacent non-cyclic partitions.
308  ///
309  /// The idea is that we currently only want to isolate the non-vectorizable
310  /// partition. We could later allow more distribution among these partition
311  /// too.
312  void mergeAdjacentNonCyclic() {
313  mergeAdjacentPartitionsIf(
314  [](const InstPartition *P) { return !P->hasDepCycle(); });
315  }
316 
317  /// If a partition contains only conditional stores, we won't vectorize
318  /// it. Try to merge it with a previous cyclic partition.
319  void mergeNonIfConvertible() {
320  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
321  if (Partition->hasDepCycle())
322  return true;
323 
324  // Now, check if all stores are conditional in this partition.
325  bool seenStore = false;
326 
327  for (auto *Inst : *Partition)
328  if (isa<StoreInst>(Inst)) {
329  seenStore = true;
331  return false;
332  }
333  return seenStore;
334  });
335  }
336 
337  /// Merges the partitions according to various heuristics.
338  void mergeBeforePopulating() {
339  mergeAdjacentNonCyclic();
341  mergeNonIfConvertible();
342  }
343 
344  /// Merges partitions in order to ensure that no loads are duplicated.
345  ///
346  /// We can't duplicate loads because that could potentially reorder them.
347  /// LoopAccessAnalysis provides dependency information with the context that
348  /// the order of memory operation is preserved.
349  ///
350  /// Return if any partitions were merged.
351  bool mergeToAvoidDuplicatedLoads() {
352  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
353  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
354 
355  LoadToPartitionT LoadToPartition;
356  ToBeMergedT ToBeMerged;
357 
358  // Step through the partitions and create equivalence between partitions
359  // that contain the same load. Also put partitions in between them in the
360  // same equivalence class to avoid reordering of memory operations.
361  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
362  E = PartitionContainer.end();
363  I != E; ++I) {
364  auto *PartI = &*I;
365 
366  // If a load occurs in two partitions PartI and PartJ, merge all
367  // partitions (PartI, PartJ] into PartI.
368  for (Instruction *Inst : *PartI)
369  if (isa<LoadInst>(Inst)) {
370  bool NewElt;
371  LoadToPartitionT::iterator LoadToPart;
372 
373  std::tie(LoadToPart, NewElt) =
374  LoadToPartition.insert(std::make_pair(Inst, PartI));
375  if (!NewElt) {
376  LLVM_DEBUG(dbgs()
377  << "Merging partitions due to this load in multiple "
378  << "partitions: " << PartI << ", " << LoadToPart->second
379  << "\n"
380  << *Inst << "\n");
381 
382  auto PartJ = I;
383  do {
384  --PartJ;
385  ToBeMerged.unionSets(PartI, &*PartJ);
386  } while (&*PartJ != LoadToPart->second);
387  }
388  }
389  }
390  if (ToBeMerged.empty())
391  return false;
392 
393  // Merge the member of an equivalence class into its class leader. This
394  // makes the members empty.
395  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
396  I != E; ++I) {
397  if (!I->isLeader())
398  continue;
399 
400  auto PartI = I->getData();
401  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
402  ToBeMerged.member_end())) {
403  PartJ->moveTo(*PartI);
404  }
405  }
406 
407  // Remove the empty partitions.
408  PartitionContainer.remove_if(
409  [](const InstPartition &P) { return P.empty(); });
410 
411  return true;
412  }
413 
414  /// Sets up the mapping between instructions to partitions. If the
415  /// instruction is duplicated across multiple partitions, set the entry to -1.
416  void setupPartitionIdOnInstructions() {
417  int PartitionID = 0;
418  for (const auto &Partition : PartitionContainer) {
419  for (Instruction *Inst : Partition) {
420  bool NewElt;
421  InstToPartitionIdT::iterator Iter;
422 
423  std::tie(Iter, NewElt) =
424  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
425  if (!NewElt)
426  Iter->second = -1;
427  }
428  ++PartitionID;
429  }
430  }
431 
432  /// Populates the partition with everything that the seeding
433  /// instructions require.
434  void populateUsedSet() {
435  for (auto &P : PartitionContainer)
436  P.populateUsedSet();
437  }
438 
439  /// This performs the main chunk of the work of cloning the loops for
440  /// the partitions.
441  void cloneLoops() {
442  BasicBlock *OrigPH = L->getLoopPreheader();
443  // At this point the predecessor of the preheader is either the memcheck
444  // block or the top part of the original preheader.
445  BasicBlock *Pred = OrigPH->getSinglePredecessor();
446  assert(Pred && "Preheader does not have a single predecessor");
447  BasicBlock *ExitBlock = L->getExitBlock();
448  assert(ExitBlock && "No single exit block");
449  Loop *NewLoop;
450 
451  assert(!PartitionContainer.empty() && "at least two partitions expected");
452  // We're cloning the preheader along with the loop so we already made sure
453  // it was empty.
454  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
455  "preheader not empty");
456 
457  // Preserve the original loop ID for use after the transformation.
458  MDNode *OrigLoopID = L->getLoopID();
459 
460  // Create a loop for each partition except the last. Clone the original
461  // loop before PH along with adding a preheader for the cloned loop. Then
462  // update PH to point to the newly added preheader.
463  BasicBlock *TopPH = OrigPH;
464  unsigned Index = getSize() - 1;
465  for (auto I = std::next(PartitionContainer.rbegin()),
466  E = PartitionContainer.rend();
467  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
468  auto *Part = &*I;
469 
470  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
471 
472  Part->getVMap()[ExitBlock] = TopPH;
473  Part->remapInstructions();
474  setNewLoopID(OrigLoopID, Part);
475  }
476  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
477 
478  // Also set a new loop ID for the last loop.
479  setNewLoopID(OrigLoopID, &PartitionContainer.back());
480 
481  // Now go in forward order and update the immediate dominator for the
482  // preheaders with the exiting block of the previous loop. Dominance
483  // within the loop is updated in cloneLoopWithPreheader.
484  for (auto Curr = PartitionContainer.cbegin(),
485  Next = std::next(PartitionContainer.cbegin()),
486  E = PartitionContainer.cend();
487  Next != E; ++Curr, ++Next)
489  Next->getDistributedLoop()->getLoopPreheader(),
490  Curr->getDistributedLoop()->getExitingBlock());
491  }
492 
493  /// Removes the dead instructions from the cloned loops.
494  void removeUnusedInsts() {
495  for (auto &Partition : PartitionContainer)
496  Partition.removeUnusedInsts();
497  }
498 
499  /// For each memory pointer, it computes the partitionId the pointer is
500  /// used in.
501  ///
502  /// This returns an array of int where the I-th entry corresponds to I-th
503  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
504  /// partitions its entry is set to -1.
506  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
507  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
508 
509  unsigned N = RtPtrCheck->Pointers.size();
510  SmallVector<int, 8> PtrToPartitions(N);
511  for (unsigned I = 0; I < N; ++I) {
512  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
513  auto Instructions =
514  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
515 
516  int &Partition = PtrToPartitions[I];
517  // First set it to uninitialized.
518  Partition = -2;
519  for (Instruction *Inst : Instructions) {
520  // Note that this could be -1 if Inst is duplicated across multiple
521  // partitions.
522  int ThisPartition = this->InstToPartitionId[Inst];
523  if (Partition == -2)
524  Partition = ThisPartition;
525  // -1 means belonging to multiple partitions.
526  else if (Partition == -1)
527  break;
528  else if (Partition != (int)ThisPartition)
529  Partition = -1;
530  }
531  assert(Partition != -2 && "Pointer not belonging to any partition");
532  }
533 
534  return PtrToPartitions;
535  }
536 
537  void print(raw_ostream &OS) const {
538  unsigned Index = 0;
539  for (const auto &P : PartitionContainer) {
540  OS << "Partition " << Index++ << " (" << &P << "):\n";
541  P.print();
542  }
543  }
544 
545  void dump() const { print(dbgs()); }
546 
547 #ifndef NDEBUG
548  friend raw_ostream &operator<<(raw_ostream &OS,
549  const InstPartitionContainer &Partitions) {
550  Partitions.print(OS);
551  return OS;
552  }
553 #endif
554 
555  void printBlocks() const {
556  unsigned Index = 0;
557  for (const auto &P : PartitionContainer) {
558  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
559  P.printBlocks();
560  }
561  }
562 
563 private:
564  using PartitionContainerT = std::list<InstPartition>;
565 
566  /// List of partitions.
567  PartitionContainerT PartitionContainer;
568 
569  /// Mapping from Instruction to partition Id. If the instruction
570  /// belongs to multiple partitions the entry contains -1.
571  InstToPartitionIdT InstToPartitionId;
572 
573  Loop *L;
574  LoopInfo *LI;
575  DominatorTree *DT;
576 
577  /// The control structure to merge adjacent partitions if both satisfy
578  /// the \p Predicate.
579  template <class UnaryPredicate>
580  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581  InstPartition *PrevMatch = nullptr;
582  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583  auto DoesMatch = Predicate(&*I);
584  if (PrevMatch == nullptr && DoesMatch) {
585  PrevMatch = &*I;
586  ++I;
587  } else if (PrevMatch != nullptr && DoesMatch) {
588  I->moveTo(*PrevMatch);
589  I = PartitionContainer.erase(I);
590  } else {
591  PrevMatch = nullptr;
592  ++I;
593  }
594  }
595  }
596 
597  /// Assign new LoopIDs for the partition's cloned loop.
598  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
600  OrigLoopID,
602  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
604  if (PartitionID.hasValue()) {
605  Loop *NewLoop = Part->getDistributedLoop();
606  NewLoop->setLoopID(PartitionID.getValue());
607  }
608  }
609 };
610 
611 /// For each memory instruction, this class maintains difference of the
612 /// number of unsafe dependences that start out from this instruction minus
613 /// those that end here.
614 ///
615 /// By traversing the memory instructions in program order and accumulating this
616 /// number, we know whether any unsafe dependence crosses over a program point.
617 class MemoryInstructionDependences {
619 
620 public:
621  struct Entry {
622  Instruction *Inst;
623  unsigned NumUnsafeDependencesStartOrEnd = 0;
624 
625  Entry(Instruction *Inst) : Inst(Inst) {}
626  };
627 
628  using AccessesType = SmallVector<Entry, 8>;
629 
630  AccessesType::const_iterator begin() const { return Accesses.begin(); }
631  AccessesType::const_iterator end() const { return Accesses.end(); }
632 
633  MemoryInstructionDependences(
635  const SmallVectorImpl<Dependence> &Dependences) {
636  Accesses.append(Instructions.begin(), Instructions.end());
637 
638  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
639  for (auto &Dep : Dependences)
640  if (Dep.isPossiblyBackward()) {
641  // Note that the designations source and destination follow the program
642  // order, i.e. source is always first. (The direction is given by the
643  // DepType.)
644  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
645  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
646 
647  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
648  }
649  }
650 
651 private:
652  AccessesType Accesses;
653 };
654 
655 /// The actual class performing the per-loop work.
656 class LoopDistributeForLoop {
657 public:
658  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
660  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
661  setForced();
662  }
663 
664  /// Try to distribute an inner-most loop.
665  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
666  assert(L->isInnermost() && "Only process inner loops.");
667 
668  LLVM_DEBUG(dbgs() << "\nLDist: In \""
669  << L->getHeader()->getParent()->getName()
670  << "\" checking " << *L << "\n");
671 
672  // Having a single exit block implies there's also one exiting block.
673  if (!L->getExitBlock())
674  return fail("MultipleExitBlocks", "multiple exit blocks");
675  if (!L->isLoopSimplifyForm())
676  return fail("NotLoopSimplifyForm",
677  "loop is not in loop-simplify form");
678  if (!L->isRotatedForm())
679  return fail("NotBottomTested", "loop is not bottom tested");
680 
681  BasicBlock *PH = L->getLoopPreheader();
682 
683  LAI = &GetLAA(*L);
684 
685  // Currently, we only distribute to isolate the part of the loop with
686  // dependence cycles to enable partial vectorization.
687  if (LAI->canVectorizeMemory())
688  return fail("MemOpsCanBeVectorized",
689  "memory operations are safe for vectorization");
690 
691  auto *Dependences = LAI->getDepChecker().getDependences();
692  if (!Dependences || Dependences->empty())
693  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
694 
695  InstPartitionContainer Partitions(L, LI, DT);
696 
697  // First, go through each memory operation and assign them to consecutive
698  // partitions (the order of partitions follows program order). Put those
699  // with unsafe dependences into "cyclic" partition otherwise put each store
700  // in its own "non-cyclic" partition (we'll merge these later).
701  //
702  // Note that a memory operation (e.g. Load2 below) at a program point that
703  // has an unsafe dependence (Store3->Load1) spanning over it must be
704  // included in the same cyclic partition as the dependent operations. This
705  // is to preserve the original program order after distribution. E.g.:
706  //
707  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
708  // Load1 -. 1 0->1
709  // Load2 | /Unsafe/ 0 1
710  // Store3 -' -1 1->0
711  // Load4 0 0
712  //
713  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
714  // we just keep assigning to the same cyclic partition until
715  // NumUnsafeDependencesActive reaches 0.
716  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
717  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
718  *Dependences);
719 
720  int NumUnsafeDependencesActive = 0;
721  for (auto &InstDep : MID) {
722  Instruction *I = InstDep.Inst;
723  // We update NumUnsafeDependencesActive post-instruction, catch the
724  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
725  if (NumUnsafeDependencesActive ||
726  InstDep.NumUnsafeDependencesStartOrEnd > 0)
727  Partitions.addToCyclicPartition(I);
728  else
729  Partitions.addToNewNonCyclicPartition(I);
730  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
731  assert(NumUnsafeDependencesActive >= 0 &&
732  "Negative number of dependences active");
733  }
734 
735  // Add partitions for values used outside. These partitions can be out of
736  // order from the original program order. This is OK because if the
737  // partition uses a load we will merge this partition with the original
738  // partition of the load that we set up in the previous loop (see
739  // mergeToAvoidDuplicatedLoads).
740  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
741  for (auto *Inst : DefsUsedOutside)
742  Partitions.addToNewNonCyclicPartition(Inst);
743 
744  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
745  if (Partitions.getSize() < 2)
746  return fail("CantIsolateUnsafeDeps",
747  "cannot isolate unsafe dependencies");
748 
749  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
750  // should be able to vectorize these together.
751  Partitions.mergeBeforePopulating();
752  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
753  if (Partitions.getSize() < 2)
754  return fail("CantIsolateUnsafeDeps",
755  "cannot isolate unsafe dependencies");
756 
757  // Now, populate the partitions with non-memory operations.
758  Partitions.populateUsedSet();
759  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
760 
761  // In order to preserve original lexical order for loads, keep them in the
762  // partition that we set up in the MemoryInstructionDependences loop.
763  if (Partitions.mergeToAvoidDuplicatedLoads()) {
764  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
765  << Partitions);
766  if (Partitions.getSize() < 2)
767  return fail("CantIsolateUnsafeDeps",
768  "cannot isolate unsafe dependencies");
769  }
770 
771  // Don't distribute the loop if we need too many SCEV run-time checks, or
772  // any if it's illegal.
773  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
774  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
775  return fail("RuntimeCheckWithConvergent",
776  "may not insert runtime check with convergent operation");
777  }
778 
779  if (Pred.getComplexity() > (IsForced.getValueOr(false)
782  return fail("TooManySCEVRuntimeChecks",
783  "too many SCEV run-time checks needed.\n");
784 
785  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
786  return fail("HeuristicDisabled", "distribution heuristic disabled");
787 
788  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
789  // We're done forming the partitions set up the reverse mapping from
790  // instructions to partitions.
791  Partitions.setupPartitionIdOnInstructions();
792 
793  // If we need run-time checks, version the loop now.
794  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
795  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
796  const auto &AllChecks = RtPtrChecking->getChecks();
797  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
798  RtPtrChecking);
799 
800  if (LAI->hasConvergentOp() && !Checks.empty()) {
801  return fail("RuntimeCheckWithConvergent",
802  "may not insert runtime check with convergent operation");
803  }
804 
805  // To keep things simple have an empty preheader before we version or clone
806  // the loop. (Also split if this has no predecessor, i.e. entry, because we
807  // rely on PH having a predecessor.)
808  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
809  SplitBlock(PH, PH->getTerminator(), DT, LI);
810 
811  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
812  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
813 
814  MDNode *OrigLoopID = L->getLoopID();
815 
816  LLVM_DEBUG(dbgs() << "\nPointers:\n");
818  LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
819  LVer.versionLoop(DefsUsedOutside);
820  LVer.annotateLoopWithNoAlias();
821 
822  // The unversioned loop will not be changed, so we inherit all attributes
823  // from the original loop, but remove the loop distribution metadata to
824  // avoid to distribute it again.
825  MDNode *UnversionedLoopID =
826  makeFollowupLoopID(OrigLoopID,
829  "llvm.loop.distribute.", true)
830  .getValue();
831  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
832  }
833 
834  // Create identical copies of the original loop for each partition and hook
835  // them up sequentially.
836  Partitions.cloneLoops();
837 
838  // Now, we remove the instruction from each loop that don't belong to that
839  // partition.
840  Partitions.removeUnusedInsts();
841  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
842  LLVM_DEBUG(Partitions.printBlocks());
843 
844  if (LDistVerify) {
845  LI->verify(*DT);
847  }
848 
849  ++NumLoopsDistributed;
850  // Report the success.
851  ORE->emit([&]() {
852  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
853  L->getHeader())
854  << "distributed loop";
855  });
856  return true;
857  }
858 
859  /// Provide diagnostics then \return with false.
860  bool fail(StringRef RemarkName, StringRef Message) {
861  LLVMContext &Ctx = F->getContext();
862  bool Forced = isForced().getValueOr(false);
863 
864  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
865 
866  // With Rpass-missed report that distribution failed.
867  ORE->emit([&]() {
868  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
869  L->getStartLoc(), L->getHeader())
870  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
871  "more "
872  "info";
873  });
874 
875  // With Rpass-analysis report why. This is on by default if distribution
876  // was requested explicitly.
877  ORE->emit(OptimizationRemarkAnalysis(
879  RemarkName, L->getStartLoc(), L->getHeader())
880  << "loop not distributed: " << Message);
881 
882  // Also issue a warning if distribution was requested explicitly but it
883  // failed.
884  if (Forced)
886  *F, L->getStartLoc(), "loop not distributed: failed "
887  "explicitly specified loop distribution"));
888 
889  return false;
890  }
891 
892  /// Return if distribution forced to be enabled/disabled for the loop.
893  ///
894  /// If the optional has a value, it indicates whether distribution was forced
895  /// to be enabled (true) or disabled (false). If the optional has no value
896  /// distribution was not forced either way.
897  const Optional<bool> &isForced() const { return IsForced; }
898 
899 private:
900  /// Filter out checks between pointers from the same partition.
901  ///
902  /// \p PtrToPartition contains the partition number for pointers. Partition
903  /// number -1 means that the pointer is used in multiple partitions. In this
904  /// case we can't safely omit the check.
905  SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
906  const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
907  const SmallVectorImpl<int> &PtrToPartition,
908  const RuntimePointerChecking *RtPtrChecking) {
910 
911  copy_if(AllChecks, std::back_inserter(Checks),
912  [&](const RuntimePointerCheck &Check) {
913  for (unsigned PtrIdx1 : Check.first->Members)
914  for (unsigned PtrIdx2 : Check.second->Members)
915  // Only include this check if there is a pair of pointers
916  // that require checking and the pointers fall into
917  // separate partitions.
918  //
919  // (Note that we already know at this point that the two
920  // pointer groups need checking but it doesn't follow
921  // that each pair of pointers within the two groups need
922  // checking as well.
923  //
924  // In other words we don't want to include a check just
925  // because there is a pair of pointers between the two
926  // pointer groups that require checks and a different
927  // pair whose pointers fall into different partitions.)
928  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
930  PtrToPartition, PtrIdx1, PtrIdx2))
931  return true;
932  return false;
933  });
934 
935  return Checks;
936  }
937 
938  /// Check whether the loop metadata is forcing distribution to be
939  /// enabled/disabled.
940  void setForced() {
942  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
943  if (!Value)
944  return;
945 
946  const MDOperand *Op = *Value;
947  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
948  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
949  }
950 
951  Loop *L;
952  Function *F;
953 
954  // Analyses used.
955  LoopInfo *LI;
956  const LoopAccessInfo *LAI = nullptr;
957  DominatorTree *DT;
958  ScalarEvolution *SE;
960 
961  /// Indicates whether distribution is forced to be enabled/disabled for
962  /// the loop.
963  ///
964  /// If the optional has a value, it indicates whether distribution was forced
965  /// to be enabled (true) or disabled (false). If the optional has no value
966  /// distribution was not forced either way.
967  Optional<bool> IsForced;
968 };
969 
970 } // end anonymous namespace
971 
972 /// Shared implementation between new and old PMs.
973 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
975  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
976  // Build up a worklist of inner-loops to vectorize. This is necessary as the
977  // act of distributing a loop creates new loops and can invalidate iterators
978  // across the loops.
979  SmallVector<Loop *, 8> Worklist;
980 
981  for (Loop *TopLevelLoop : *LI)
982  for (Loop *L : depth_first(TopLevelLoop))
983  // We only handle inner-most loops.
984  if (L->isInnermost())
985  Worklist.push_back(L);
986 
987  // Now walk the identified inner loops.
988  bool Changed = false;
989  for (Loop *L : Worklist) {
990  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
991 
992  // If distribution was forced for the specific loop to be
993  // enabled/disabled, follow that. Otherwise use the global flag.
994  if (LDL.isForced().getValueOr(EnableLoopDistribute))
995  Changed |= LDL.processLoop(GetLAA);
996  }
997 
998  // Process each loop nest in the function.
999  return Changed;
1000 }
1001 
1002 namespace {
1003 
1004 /// The pass class.
1005 class LoopDistributeLegacy : public FunctionPass {
1006 public:
1007  static char ID;
1008 
1009  LoopDistributeLegacy() : FunctionPass(ID) {
1010  // The default is set by the caller.
1012  }
1013 
1014  bool runOnFunction(Function &F) override {
1015  if (skipFunction(F))
1016  return false;
1017 
1018  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1019  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1020  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1021  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1022  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1023  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1024  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1025 
1026  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1027  }
1028 
1029  void getAnalysisUsage(AnalysisUsage &AU) const override {
1038  }
1039 };
1040 
1041 } // end anonymous namespace
1042 
1045  auto &LI = AM.getResult<LoopAnalysis>(F);
1046  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1047  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1049 
1050  // We don't directly need these analyses but they're required for loop
1051  // analyses so provide them below.
1052  auto &AA = AM.getResult<AAManager>(F);
1053  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1054  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1055  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1056 
1057  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1058  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1059  [&](Loop &L) -> const LoopAccessInfo & {
1060  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
1061  TLI, TTI, nullptr, nullptr, nullptr};
1062  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1063  };
1064 
1065  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1066  if (!Changed)
1067  return PreservedAnalyses::all();
1068  PreservedAnalyses PA;
1069  PA.preserve<LoopAnalysis>();
1071  return PA;
1072 }
1073 
1075 
1076 static const char ldist_name[] = "Loop Distribution";
1077 
1078 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1079  false)
1085 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1086 
1087 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:87
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2372
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
DistributeSCEVCheckThreshold
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2093
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:714
llvm::minidump::StreamType::Unused
@ Unused
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::RuntimePointerCheck
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
Definition: LoopAccessAnalysis.h:364
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
Optional.h
ValueMapper.h
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
Metadata.h
llvm::cloneLoopWithPreheader
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
Definition: CloneFunction.cpp:801
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Scalar.h
llvm::hasDisableAllTransformsHint
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:352
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MipsISD::LDL
@ LDL
Definition: MipsISelLowering.h:249
llvm::SmallVector< Instruction *, 8 >
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:690
Statistic.h
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:59
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
LoopAccessAnalysis.h
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::PredicatedScalarEvolution::getUnionPredicate
const SCEVUnionPredicate & getUnionPredicate() const
Definition: ScalarEvolution.cpp:13614
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:271
llvm::Dependence
Dependence - This class represents a dependence between two memory memory references in a function.
Definition: DependenceAnalysis.h:71
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:756
ScalarEvolution.h
llvm::findDefsUsedOutsideOfLoop
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:131
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:260
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
PragmaDistributeSCEVCheckThreshold
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
LLVMLoopDistributeFollowupFallback
static const char *const LLVMLoopDistributeFollowupFallback
Definition: LoopDistribute.cpp:88
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
STLExtras.h
LLVMLoopDistributeFollowupAll
static const char *const LLVMLoopDistributeFollowupAll
Definition: LoopDistribute.cpp:82
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
llvm::initializeLoopDistributeLegacyPass
void initializeLoopDistributeLegacyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
LoopAnalysisManager.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
ldist_name
static const char ldist_name[]
Definition: LoopDistribute.cpp:1076
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
EnableLoopDistribute
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2145
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Twine.h
InstrTypes.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:787
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
runImpl
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
Definition: LoopDistribute.cpp:973
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2123
SmallPtrSet.h
llvm::createLoopDistributePass
FunctionPass * createLoopDistributePass()
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:224
llvm::LoopAccessInfo::hasConvergentOp
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Definition: LoopAccessAnalysis.h:527
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
DistributeNonIfConvertible
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
LDIST_NAME
#define LDIST_NAME
Definition: LoopDistribute.cpp:77
Check
static bool Check(DecodeStatus &Out, DecodeStatus In)
Definition: AArch64Disassembler.cpp:242
llvm::findStringMetadataForLoop
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1055
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
BasicBlock.h
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SCEVUnionPredicate::getComplexity
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union.
Definition: ScalarEvolution.h:449
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:368
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:108
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
llvm::LoopVersioning
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Definition: LoopVersioning.h:40
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::LoopAccessInfo::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
Definition: LoopAccessAnalysis.h:561
llvm::RuntimePointerChecking::getChecks
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
Definition: LoopAccessAnalysis.h:426
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:714
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:529
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:515
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryDepChecker::getMemoryInstructions
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Definition: LoopAccessAnalysis.h:232
llvm::LoopDistributePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopDistribute.cpp:1043
llvm::User::replaceUsesOfWith
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:522
iterator_range.h
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:164
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::SCEVUnionPredicate::isAlwaysTrue
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
Definition: ScalarEvolution.cpp:13513
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:557
LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupSequential
Definition: LoopDistribute.cpp:86
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:451
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:443
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1646
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:528
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::SCEVUnionPredicate
This class represents a composition of other SCEV predicates, and is the class that most clients will...
Definition: ScalarEvolution.h:416
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
LDistVerify
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:469
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:504
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
EquivalenceClasses.h
llvm::DiagnosticInfoOptimizationFailure
Diagnostic information for optimization failures.
Definition: DiagnosticInfo.h:972
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:816
LoopVersioning.h
LoopDistribute.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
Definition: LoopDistribute.cpp:1078
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
Instructions.h
SmallVector.h
Dominators.h
LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupCoincident
Definition: LoopDistribute.cpp:84
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::SmallVectorImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::Loop::isRotatedForm
bool isRotatedForm() const
Return true if the loop is in rotated form.
Definition: LoopInfo.h:788
llvm::cl::desc
Definition: CommandLine.h:412
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:587
raw_ostream.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
Value.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:753
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1195
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38