LLVM  4.0.0
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Loop Distribution Pass. Its main focus is to
11 // distribute loops that cannot be vectorized due to dependence cycles. It
12 // tries to isolate the offending dependences into a new loop allowing
13 // vectorization of the remaining parts.
14 //
15 // For dependence analysis, the pass uses the LoopVectorizer's
16 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
17 // memory operations, special care is taken to preserve the lexical order of
18 // these operations.
19 //
20 // Similarly to the Vectorizer, the pass also supports loop versioning to
21 // run-time disambiguate potentially overlapping arrays.
22 //
23 //===----------------------------------------------------------------------===//
24 
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/IR/DiagnosticInfo.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/Pass.h"
39 #include "llvm/Support/Debug.h"
45 #include <list>
46 
47 #define LDIST_NAME "loop-distribute"
48 #define DEBUG_TYPE LDIST_NAME
49 
50 using namespace llvm;
51 
52 static cl::opt<bool>
53  LDistVerify("loop-distribute-verify", cl::Hidden,
54  cl::desc("Turn on DominatorTree and LoopInfo verification "
55  "after Loop Distribution"),
56  cl::init(false));
57 
59  "loop-distribute-non-if-convertible", cl::Hidden,
60  cl::desc("Whether to distribute into a loop that may not be "
61  "if-convertible by the loop vectorizer"),
62  cl::init(false));
63 
65  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
66  cl::desc("The maximum number of SCEV checks allowed for Loop "
67  "Distribution"));
68 
70  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
71  cl::Hidden,
72  cl::desc(
73  "The maximum number of SCEV checks allowed for Loop "
74  "Distribution for loop marked with #pragma loop distribute(enable)"));
75 
77  "enable-loop-distribute", cl::Hidden,
78  cl::desc("Enable the new, experimental LoopDistribution Pass"),
79  cl::init(false));
80 
81 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
82 
83 namespace {
84 /// \brief Maintains the set of instructions of the loop for a partition before
85 /// cloning. After cloning, it hosts the new loop.
86 class InstPartition {
87  typedef SmallPtrSet<Instruction *, 8> InstructionSet;
88 
89 public:
90  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
91  : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) {
92  Set.insert(I);
93  }
94 
95  /// \brief Returns whether this partition contains a dependence cycle.
96  bool hasDepCycle() const { return DepCycle; }
97 
98  /// \brief Adds an instruction to this partition.
99  void add(Instruction *I) { Set.insert(I); }
100 
101  /// \brief Collection accessors.
102  InstructionSet::iterator begin() { return Set.begin(); }
103  InstructionSet::iterator end() { return Set.end(); }
104  InstructionSet::const_iterator begin() const { return Set.begin(); }
105  InstructionSet::const_iterator end() const { return Set.end(); }
106  bool empty() const { return Set.empty(); }
107 
108  /// \brief Moves this partition into \p Other. This partition becomes empty
109  /// after this.
110  void moveTo(InstPartition &Other) {
111  Other.Set.insert(Set.begin(), Set.end());
112  Set.clear();
113  Other.DepCycle |= DepCycle;
114  }
115 
116  /// \brief Populates the partition with a transitive closure of all the
117  /// instructions that the seeded instructions dependent on.
118  void populateUsedSet() {
119  // FIXME: We currently don't use control-dependence but simply include all
120  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
121  // up.
122  for (auto *B : OrigLoop->getBlocks())
123  Set.insert(B->getTerminator());
124 
125  // Follow the use-def chains to form a transitive closure of all the
126  // instructions that the originally seeded instructions depend on.
127  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
128  while (!Worklist.empty()) {
129  Instruction *I = Worklist.pop_back_val();
130  // Insert instructions from the loop that we depend on.
131  for (Value *V : I->operand_values()) {
132  auto *I = dyn_cast<Instruction>(V);
133  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
134  Worklist.push_back(I);
135  }
136  }
137  }
138 
139  /// \brief Clones the original loop.
140  ///
141  /// Updates LoopInfo and DominatorTree using the information that block \p
142  /// LoopDomBB dominates the loop.
143  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
144  unsigned Index, LoopInfo *LI,
145  DominatorTree *DT) {
146  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
147  VMap, Twine(".ldist") + Twine(Index),
148  LI, DT, ClonedLoopBlocks);
149  return ClonedLoop;
150  }
151 
152  /// \brief The cloned loop. If this partition is mapped to the original loop,
153  /// this is null.
154  const Loop *getClonedLoop() const { return ClonedLoop; }
155 
156  /// \brief Returns the loop where this partition ends up after distribution.
157  /// If this partition is mapped to the original loop then use the block from
158  /// the loop.
159  const Loop *getDistributedLoop() const {
160  return ClonedLoop ? ClonedLoop : OrigLoop;
161  }
162 
163  /// \brief The VMap that is populated by cloning and then used in
164  /// remapinstruction to remap the cloned instructions.
165  ValueToValueMapTy &getVMap() { return VMap; }
166 
167  /// \brief Remaps the cloned instructions using VMap.
168  void remapInstructions() {
169  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
170  }
171 
172  /// \brief Based on the set of instructions selected for this partition,
173  /// removes the unnecessary ones.
174  void removeUnusedInsts() {
176 
177  for (auto *Block : OrigLoop->getBlocks())
178  for (auto &Inst : *Block)
179  if (!Set.count(&Inst)) {
180  Instruction *NewInst = &Inst;
181  if (!VMap.empty())
182  NewInst = cast<Instruction>(VMap[NewInst]);
183 
184  assert(!isa<BranchInst>(NewInst) &&
185  "Branches are marked used early on");
186  Unused.push_back(NewInst);
187  }
188 
189  // Delete the instructions backwards, as it has a reduced likelihood of
190  // having to update as many def-use and use-def chains.
191  for (auto *Inst : reverse(Unused)) {
192  if (!Inst->use_empty())
193  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
194  Inst->eraseFromParent();
195  }
196  }
197 
198  void print() const {
199  if (DepCycle)
200  dbgs() << " (cycle)\n";
201  for (auto *I : Set)
202  // Prefix with the block name.
203  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
204  }
205 
206  void printBlocks() const {
207  for (auto *BB : getDistributedLoop()->getBlocks())
208  dbgs() << *BB;
209  }
210 
211 private:
212  /// \brief Instructions from OrigLoop selected for this partition.
213  InstructionSet Set;
214 
215  /// \brief Whether this partition contains a dependence cycle.
216  bool DepCycle;
217 
218  /// \brief The original loop.
219  Loop *OrigLoop;
220 
221  /// \brief The cloned loop. If this partition is mapped to the original loop,
222  /// this is null.
223  Loop *ClonedLoop;
224 
225  /// \brief The blocks of ClonedLoop including the preheader. If this
226  /// partition is mapped to the original loop, this is empty.
227  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
228 
229  /// \brief These gets populated once the set of instructions have been
230  /// finalized. If this partition is mapped to the original loop, these are not
231  /// set.
232  ValueToValueMapTy VMap;
233 };
234 
235 /// \brief Holds the set of Partitions. It populates them, merges them and then
236 /// clones the loops.
237 class InstPartitionContainer {
238  typedef DenseMap<Instruction *, int> InstToPartitionIdT;
239 
240 public:
241  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
242  : L(L), LI(LI), DT(DT) {}
243 
244  /// \brief Returns the number of partitions.
245  unsigned getSize() const { return PartitionContainer.size(); }
246 
247  /// \brief Adds \p Inst into the current partition if that is marked to
248  /// contain cycles. Otherwise start a new partition for it.
249  void addToCyclicPartition(Instruction *Inst) {
250  // If the current partition is non-cyclic. Start a new one.
251  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
252  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
253  else
254  PartitionContainer.back().add(Inst);
255  }
256 
257  /// \brief Adds \p Inst into a partition that is not marked to contain
258  /// dependence cycles.
259  ///
260  // Initially we isolate memory instructions into as many partitions as
261  // possible, then later we may merge them back together.
262  void addToNewNonCyclicPartition(Instruction *Inst) {
263  PartitionContainer.emplace_back(Inst, L);
264  }
265 
266  /// \brief Merges adjacent non-cyclic partitions.
267  ///
268  /// The idea is that we currently only want to isolate the non-vectorizable
269  /// partition. We could later allow more distribution among these partition
270  /// too.
271  void mergeAdjacentNonCyclic() {
272  mergeAdjacentPartitionsIf(
273  [](const InstPartition *P) { return !P->hasDepCycle(); });
274  }
275 
276  /// \brief If a partition contains only conditional stores, we won't vectorize
277  /// it. Try to merge it with a previous cyclic partition.
278  void mergeNonIfConvertible() {
279  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
280  if (Partition->hasDepCycle())
281  return true;
282 
283  // Now, check if all stores are conditional in this partition.
284  bool seenStore = false;
285 
286  for (auto *Inst : *Partition)
287  if (isa<StoreInst>(Inst)) {
288  seenStore = true;
290  return false;
291  }
292  return seenStore;
293  });
294  }
295 
296  /// \brief Merges the partitions according to various heuristics.
297  void mergeBeforePopulating() {
298  mergeAdjacentNonCyclic();
300  mergeNonIfConvertible();
301  }
302 
303  /// \brief Merges partitions in order to ensure that no loads are duplicated.
304  ///
305  /// We can't duplicate loads because that could potentially reorder them.
306  /// LoopAccessAnalysis provides dependency information with the context that
307  /// the order of memory operation is preserved.
308  ///
309  /// Return if any partitions were merged.
310  bool mergeToAvoidDuplicatedLoads() {
311  typedef DenseMap<Instruction *, InstPartition *> LoadToPartitionT;
312  typedef EquivalenceClasses<InstPartition *> ToBeMergedT;
313 
314  LoadToPartitionT LoadToPartition;
315  ToBeMergedT ToBeMerged;
316 
317  // Step through the partitions and create equivalence between partitions
318  // that contain the same load. Also put partitions in between them in the
319  // same equivalence class to avoid reordering of memory operations.
320  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
321  E = PartitionContainer.end();
322  I != E; ++I) {
323  auto *PartI = &*I;
324 
325  // If a load occurs in two partitions PartI and PartJ, merge all
326  // partitions (PartI, PartJ] into PartI.
327  for (Instruction *Inst : *PartI)
328  if (isa<LoadInst>(Inst)) {
329  bool NewElt;
330  LoadToPartitionT::iterator LoadToPart;
331 
332  std::tie(LoadToPart, NewElt) =
333  LoadToPartition.insert(std::make_pair(Inst, PartI));
334  if (!NewElt) {
335  DEBUG(dbgs() << "Merging partitions due to this load in multiple "
336  << "partitions: " << PartI << ", "
337  << LoadToPart->second << "\n" << *Inst << "\n");
338 
339  auto PartJ = I;
340  do {
341  --PartJ;
342  ToBeMerged.unionSets(PartI, &*PartJ);
343  } while (&*PartJ != LoadToPart->second);
344  }
345  }
346  }
347  if (ToBeMerged.empty())
348  return false;
349 
350  // Merge the member of an equivalence class into its class leader. This
351  // makes the members empty.
352  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
353  I != E; ++I) {
354  if (!I->isLeader())
355  continue;
356 
357  auto PartI = I->getData();
358  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
359  ToBeMerged.member_end())) {
360  PartJ->moveTo(*PartI);
361  }
362  }
363 
364  // Remove the empty partitions.
365  PartitionContainer.remove_if(
366  [](const InstPartition &P) { return P.empty(); });
367 
368  return true;
369  }
370 
371  /// \brief Sets up the mapping between instructions to partitions. If the
372  /// instruction is duplicated across multiple partitions, set the entry to -1.
373  void setupPartitionIdOnInstructions() {
374  int PartitionID = 0;
375  for (const auto &Partition : PartitionContainer) {
376  for (Instruction *Inst : Partition) {
377  bool NewElt;
378  InstToPartitionIdT::iterator Iter;
379 
380  std::tie(Iter, NewElt) =
381  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
382  if (!NewElt)
383  Iter->second = -1;
384  }
385  ++PartitionID;
386  }
387  }
388 
389  /// \brief Populates the partition with everything that the seeding
390  /// instructions require.
391  void populateUsedSet() {
392  for (auto &P : PartitionContainer)
393  P.populateUsedSet();
394  }
395 
396  /// \brief This performs the main chunk of the work of cloning the loops for
397  /// the partitions.
398  void cloneLoops() {
399  BasicBlock *OrigPH = L->getLoopPreheader();
400  // At this point the predecessor of the preheader is either the memcheck
401  // block or the top part of the original preheader.
402  BasicBlock *Pred = OrigPH->getSinglePredecessor();
403  assert(Pred && "Preheader does not have a single predecessor");
404  BasicBlock *ExitBlock = L->getExitBlock();
405  assert(ExitBlock && "No single exit block");
406  Loop *NewLoop;
407 
408  assert(!PartitionContainer.empty() && "at least two partitions expected");
409  // We're cloning the preheader along with the loop so we already made sure
410  // it was empty.
411  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
412  "preheader not empty");
413 
414  // Create a loop for each partition except the last. Clone the original
415  // loop before PH along with adding a preheader for the cloned loop. Then
416  // update PH to point to the newly added preheader.
417  BasicBlock *TopPH = OrigPH;
418  unsigned Index = getSize() - 1;
419  for (auto I = std::next(PartitionContainer.rbegin()),
420  E = PartitionContainer.rend();
421  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
422  auto *Part = &*I;
423 
424  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
425 
426  Part->getVMap()[ExitBlock] = TopPH;
427  Part->remapInstructions();
428  }
429  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
430 
431  // Now go in forward order and update the immediate dominator for the
432  // preheaders with the exiting block of the previous loop. Dominance
433  // within the loop is updated in cloneLoopWithPreheader.
434  for (auto Curr = PartitionContainer.cbegin(),
435  Next = std::next(PartitionContainer.cbegin()),
436  E = PartitionContainer.cend();
437  Next != E; ++Curr, ++Next)
439  Next->getDistributedLoop()->getLoopPreheader(),
440  Curr->getDistributedLoop()->getExitingBlock());
441  }
442 
443  /// \brief Removes the dead instructions from the cloned loops.
444  void removeUnusedInsts() {
445  for (auto &Partition : PartitionContainer)
446  Partition.removeUnusedInsts();
447  }
448 
449  /// \brief For each memory pointer, it computes the partitionId the pointer is
450  /// used in.
451  ///
452  /// This returns an array of int where the I-th entry corresponds to I-th
453  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
454  /// partitions its entry is set to -1.
456  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
457  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
458 
459  unsigned N = RtPtrCheck->Pointers.size();
460  SmallVector<int, 8> PtrToPartitions(N);
461  for (unsigned I = 0; I < N; ++I) {
462  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
463  auto Instructions =
464  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
465 
466  int &Partition = PtrToPartitions[I];
467  // First set it to uninitialized.
468  Partition = -2;
469  for (Instruction *Inst : Instructions) {
470  // Note that this could be -1 if Inst is duplicated across multiple
471  // partitions.
472  int ThisPartition = this->InstToPartitionId[Inst];
473  if (Partition == -2)
474  Partition = ThisPartition;
475  // -1 means belonging to multiple partitions.
476  else if (Partition == -1)
477  break;
478  else if (Partition != (int)ThisPartition)
479  Partition = -1;
480  }
481  assert(Partition != -2 && "Pointer not belonging to any partition");
482  }
483 
484  return PtrToPartitions;
485  }
486 
487  void print(raw_ostream &OS) const {
488  unsigned Index = 0;
489  for (const auto &P : PartitionContainer) {
490  OS << "Partition " << Index++ << " (" << &P << "):\n";
491  P.print();
492  }
493  }
494 
495  void dump() const { print(dbgs()); }
496 
497 #ifndef NDEBUG
498  friend raw_ostream &operator<<(raw_ostream &OS,
499  const InstPartitionContainer &Partitions) {
500  Partitions.print(OS);
501  return OS;
502  }
503 #endif
504 
505  void printBlocks() const {
506  unsigned Index = 0;
507  for (const auto &P : PartitionContainer) {
508  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
509  P.printBlocks();
510  }
511  }
512 
513 private:
514  typedef std::list<InstPartition> PartitionContainerT;
515 
516  /// \brief List of partitions.
517  PartitionContainerT PartitionContainer;
518 
519  /// \brief Mapping from Instruction to partition Id. If the instruction
520  /// belongs to multiple partitions the entry contains -1.
521  InstToPartitionIdT InstToPartitionId;
522 
523  Loop *L;
524  LoopInfo *LI;
525  DominatorTree *DT;
526 
527  /// \brief The control structure to merge adjacent partitions if both satisfy
528  /// the \p Predicate.
529  template <class UnaryPredicate>
530  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
531  InstPartition *PrevMatch = nullptr;
532  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
533  auto DoesMatch = Predicate(&*I);
534  if (PrevMatch == nullptr && DoesMatch) {
535  PrevMatch = &*I;
536  ++I;
537  } else if (PrevMatch != nullptr && DoesMatch) {
538  I->moveTo(*PrevMatch);
539  I = PartitionContainer.erase(I);
540  } else {
541  PrevMatch = nullptr;
542  ++I;
543  }
544  }
545  }
546 };
547 
548 /// \brief For each memory instruction, this class maintains difference of the
549 /// number of unsafe dependences that start out from this instruction minus
550 /// those that end here.
551 ///
552 /// By traversing the memory instructions in program order and accumulating this
553 /// number, we know whether any unsafe dependence crosses over a program point.
554 class MemoryInstructionDependences {
556 
557 public:
558  struct Entry {
559  Instruction *Inst;
560  unsigned NumUnsafeDependencesStartOrEnd;
561 
562  Entry(Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {}
563  };
564 
565  typedef SmallVector<Entry, 8> AccessesType;
566 
567  AccessesType::const_iterator begin() const { return Accesses.begin(); }
568  AccessesType::const_iterator end() const { return Accesses.end(); }
569 
570  MemoryInstructionDependences(
571  const SmallVectorImpl<Instruction *> &Instructions,
572  const SmallVectorImpl<Dependence> &Dependences) {
573  Accesses.append(Instructions.begin(), Instructions.end());
574 
575  DEBUG(dbgs() << "Backward dependences:\n");
576  for (auto &Dep : Dependences)
577  if (Dep.isPossiblyBackward()) {
578  // Note that the designations source and destination follow the program
579  // order, i.e. source is always first. (The direction is given by the
580  // DepType.)
581  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
582  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
583 
584  DEBUG(Dep.print(dbgs(), 2, Instructions));
585  }
586  }
587 
588 private:
589  AccessesType Accesses;
590 };
591 
592 /// \brief The actual class performing the per-loop work.
593 class LoopDistributeForLoop {
594 public:
595  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
597  : L(L), F(F), LI(LI), LAI(nullptr), DT(DT), SE(SE), ORE(ORE) {
598  setForced();
599  }
600 
601  /// \brief Try to distribute an inner-most loop.
602  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
603  assert(L->empty() && "Only process inner loops.");
604 
605  DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
606  << "\" checking " << *L << "\n");
607 
608  if (!L->getExitBlock())
609  return fail("MultipleExitBlocks", "multiple exit blocks");
610  if (!L->isLoopSimplifyForm())
611  return fail("NotLoopSimplifyForm",
612  "loop is not in loop-simplify form");
613 
614  BasicBlock *PH = L->getLoopPreheader();
615 
616  // LAA will check that we only have a single exiting block.
617  LAI = &GetLAA(*L);
618 
619  // Currently, we only distribute to isolate the part of the loop with
620  // dependence cycles to enable partial vectorization.
621  if (LAI->canVectorizeMemory())
622  return fail("MemOpsCanBeVectorized",
623  "memory operations are safe for vectorization");
624 
625  auto *Dependences = LAI->getDepChecker().getDependences();
626  if (!Dependences || Dependences->empty())
627  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
628 
629  InstPartitionContainer Partitions(L, LI, DT);
630 
631  // First, go through each memory operation and assign them to consecutive
632  // partitions (the order of partitions follows program order). Put those
633  // with unsafe dependences into "cyclic" partition otherwise put each store
634  // in its own "non-cyclic" partition (we'll merge these later).
635  //
636  // Note that a memory operation (e.g. Load2 below) at a program point that
637  // has an unsafe dependence (Store3->Load1) spanning over it must be
638  // included in the same cyclic partition as the dependent operations. This
639  // is to preserve the original program order after distribution. E.g.:
640  //
641  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
642  // Load1 -. 1 0->1
643  // Load2 | /Unsafe/ 0 1
644  // Store3 -' -1 1->0
645  // Load4 0 0
646  //
647  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
648  // we just keep assigning to the same cyclic partition until
649  // NumUnsafeDependencesActive reaches 0.
650  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
651  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
652  *Dependences);
653 
654  int NumUnsafeDependencesActive = 0;
655  for (auto &InstDep : MID) {
656  Instruction *I = InstDep.Inst;
657  // We update NumUnsafeDependencesActive post-instruction, catch the
658  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
659  if (NumUnsafeDependencesActive ||
660  InstDep.NumUnsafeDependencesStartOrEnd > 0)
661  Partitions.addToCyclicPartition(I);
662  else
663  Partitions.addToNewNonCyclicPartition(I);
664  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
665  assert(NumUnsafeDependencesActive >= 0 &&
666  "Negative number of dependences active");
667  }
668 
669  // Add partitions for values used outside. These partitions can be out of
670  // order from the original program order. This is OK because if the
671  // partition uses a load we will merge this partition with the original
672  // partition of the load that we set up in the previous loop (see
673  // mergeToAvoidDuplicatedLoads).
674  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
675  for (auto *Inst : DefsUsedOutside)
676  Partitions.addToNewNonCyclicPartition(Inst);
677 
678  DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
679  if (Partitions.getSize() < 2)
680  return fail("CantIsolateUnsafeDeps",
681  "cannot isolate unsafe dependencies");
682 
683  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
684  // should be able to vectorize these together.
685  Partitions.mergeBeforePopulating();
686  DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
687  if (Partitions.getSize() < 2)
688  return fail("CantIsolateUnsafeDeps",
689  "cannot isolate unsafe dependencies");
690 
691  // Now, populate the partitions with non-memory operations.
692  Partitions.populateUsedSet();
693  DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
694 
695  // In order to preserve original lexical order for loads, keep them in the
696  // partition that we set up in the MemoryInstructionDependences loop.
697  if (Partitions.mergeToAvoidDuplicatedLoads()) {
698  DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
699  << Partitions);
700  if (Partitions.getSize() < 2)
701  return fail("CantIsolateUnsafeDeps",
702  "cannot isolate unsafe dependencies");
703  }
704 
705  // Don't distribute the loop if we need too many SCEV run-time checks.
706  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
707  if (Pred.getComplexity() > (IsForced.getValueOr(false)
710  return fail("TooManySCEVRuntimeChecks",
711  "too many SCEV run-time checks needed.\n");
712 
713  DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
714  // We're done forming the partitions set up the reverse mapping from
715  // instructions to partitions.
716  Partitions.setupPartitionIdOnInstructions();
717 
718  // To keep things simple have an empty preheader before we version or clone
719  // the loop. (Also split if this has no predecessor, i.e. entry, because we
720  // rely on PH having a predecessor.)
721  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
722  SplitBlock(PH, PH->getTerminator(), DT, LI);
723 
724  // If we need run-time checks, version the loop now.
725  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
726  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
727  const auto &AllChecks = RtPtrChecking->getChecks();
728  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
729  RtPtrChecking);
730 
731  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
732  DEBUG(dbgs() << "\nPointers:\n");
733  DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
734  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
735  LVer.setAliasChecks(std::move(Checks));
736  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
737  LVer.versionLoop(DefsUsedOutside);
738  LVer.annotateLoopWithNoAlias();
739  }
740 
741  // Create identical copies of the original loop for each partition and hook
742  // them up sequentially.
743  Partitions.cloneLoops();
744 
745  // Now, we remove the instruction from each loop that don't belong to that
746  // partition.
747  Partitions.removeUnusedInsts();
748  DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
749  DEBUG(Partitions.printBlocks());
750 
751  if (LDistVerify) {
752  LI->verify(*DT);
753  DT->verifyDomTree();
754  }
755 
756  ++NumLoopsDistributed;
757  // Report the success.
758  ORE->emit(OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
759  L->getHeader())
760  << "distributed loop");
761  return true;
762  }
763 
764  /// \brief Provide diagnostics then \return with false.
765  bool fail(StringRef RemarkName, StringRef Message) {
766  LLVMContext &Ctx = F->getContext();
767  bool Forced = isForced().getValueOr(false);
768 
769  DEBUG(dbgs() << "Skipping; " << Message << "\n");
770 
771  // With Rpass-missed report that distribution failed.
772  ORE->emit(
773  OptimizationRemarkMissed(LDIST_NAME, "NotDistributed", L->getStartLoc(),
774  L->getHeader())
775  << "loop not distributed: use -Rpass-analysis=loop-distribute for more "
776  "info");
777 
778  // With Rpass-analysis report why. This is on by default if distribution
779  // was requested explicitly.
780  ORE->emit(OptimizationRemarkAnalysis(
782  RemarkName, L->getStartLoc(), L->getHeader())
783  << "loop not distributed: " << Message);
784 
785  // Also issue a warning if distribution was requested explicitly but it
786  // failed.
787  if (Forced)
789  *F, L->getStartLoc(), "loop not distributed: failed "
790  "explicitly specified loop distribution"));
791 
792  return false;
793  }
794 
795  /// \brief Return if distribution forced to be enabled/disabled for the loop.
796  ///
797  /// If the optional has a value, it indicates whether distribution was forced
798  /// to be enabled (true) or disabled (false). If the optional has no value
799  /// distribution was not forced either way.
800  const Optional<bool> &isForced() const { return IsForced; }
801 
802 private:
803  /// \brief Filter out checks between pointers from the same partition.
804  ///
805  /// \p PtrToPartition contains the partition number for pointers. Partition
806  /// number -1 means that the pointer is used in multiple partitions. In this
807  /// case we can't safely omit the check.
809  includeOnlyCrossPartitionChecks(
811  const SmallVectorImpl<int> &PtrToPartition,
812  const RuntimePointerChecking *RtPtrChecking) {
814 
815  std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
817  for (unsigned PtrIdx1 : Check.first->Members)
818  for (unsigned PtrIdx2 : Check.second->Members)
819  // Only include this check if there is a pair of pointers
820  // that require checking and the pointers fall into
821  // separate partitions.
822  //
823  // (Note that we already know at this point that the two
824  // pointer groups need checking but it doesn't follow
825  // that each pair of pointers within the two groups need
826  // checking as well.
827  //
828  // In other words we don't want to include a check just
829  // because there is a pair of pointers between the two
830  // pointer groups that require checks and a different
831  // pair whose pointers fall into different partitions.)
832  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
834  PtrToPartition, PtrIdx1, PtrIdx2))
835  return true;
836  return false;
837  });
838 
839  return Checks;
840  }
841 
842  /// \brief Check whether the loop metadata is forcing distribution to be
843  /// enabled/disabled.
844  void setForced() {
846  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
847  if (!Value)
848  return;
849 
850  const MDOperand *Op = *Value;
851  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
852  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
853  }
854 
855  Loop *L;
856  Function *F;
857 
858  // Analyses used.
859  LoopInfo *LI;
860  const LoopAccessInfo *LAI;
861  DominatorTree *DT;
862  ScalarEvolution *SE;
864 
865  /// \brief Indicates whether distribution is forced to be enabled/disabled for
866  /// the loop.
867  ///
868  /// If the optional has a value, it indicates whether distribution was forced
869  /// to be enabled (true) or disabled (false). If the optional has no value
870  /// distribution was not forced either way.
871  Optional<bool> IsForced;
872 };
873 
874 /// Shared implementation between new and old PMs.
875 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
877  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
878  // Build up a worklist of inner-loops to vectorize. This is necessary as the
879  // act of distributing a loop creates new loops and can invalidate iterators
880  // across the loops.
881  SmallVector<Loop *, 8> Worklist;
882 
883  for (Loop *TopLevelLoop : *LI)
884  for (Loop *L : depth_first(TopLevelLoop))
885  // We only handle inner-most loops.
886  if (L->empty())
887  Worklist.push_back(L);
888 
889  // Now walk the identified inner loops.
890  bool Changed = false;
891  for (Loop *L : Worklist) {
892  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
893 
894  // If distribution was forced for the specific loop to be
895  // enabled/disabled, follow that. Otherwise use the global flag.
896  if (LDL.isForced().getValueOr(EnableLoopDistribute))
897  Changed |= LDL.processLoop(GetLAA);
898  }
899 
900  // Process each loop nest in the function.
901  return Changed;
902 }
903 
904 /// \brief The pass class.
905 class LoopDistributeLegacy : public FunctionPass {
906 public:
907  LoopDistributeLegacy() : FunctionPass(ID) {
908  // The default is set by the caller.
910  }
911 
912  bool runOnFunction(Function &F) override {
913  if (skipFunction(F))
914  return false;
915 
916  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
917  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
918  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
919  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
920  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
921  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
922  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
923 
924  return runImpl(F, LI, DT, SE, ORE, GetLAA);
925  }
926 
927  void getAnalysisUsage(AnalysisUsage &AU) const override {
936  }
937 
938  static char ID;
939 };
940 } // anonymous namespace
941 
944  auto &LI = AM.getResult<LoopAnalysis>(F);
945  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
946  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
948 
949  // We don't directly need these analyses but they're required for loop
950  // analyses so provide them below.
951  auto &AA = AM.getResult<AAManager>(F);
952  auto &AC = AM.getResult<AssumptionAnalysis>(F);
953  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
954  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
955 
956  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
957  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
958  [&](Loop &L) -> const LoopAccessInfo & {
959  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI};
960  return LAM.getResult<LoopAccessAnalysis>(L, AR);
961  };
962 
963  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
964  if (!Changed)
965  return PreservedAnalyses::all();
967  PA.preserve<LoopAnalysis>();
969  PA.preserve<GlobalsAA>();
970  return PA;
971 }
972 
974 static const char ldist_name[] = "Loop Distribution";
975 
976 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
977  false)
983 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
984 
985 namespace llvm {
986 FunctionPass *createLoopDistributePass() { return new LoopDistributeLegacy(); }
987 }
Legacy wrapper pass to provide the GlobalsAAResult object.
MachineLoop * L
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:679
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
STATISTIC(NumFunctions,"Total number of functions")
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:307
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be ""if-convertible by the loop vectorizer"), cl::init(false))
The main scalar evolution driver.
static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg)
Analysis pass providing the TargetTransformInfo.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:79
BlockT * getHeader() const
Definition: LoopInfo.h:102
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:806
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:920
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:190
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
#define F(x, y, z)
Definition: MD5.cpp:51
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop ""Distribution"))
const RuntimePointerChecking * getRuntimePointerChecking() const
Function Alias Analysis false
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:239
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:24
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Analysis pass providing a never-invalidated alias analysis result.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
void verifyDomTree() const
Verify the correctness of the domtree by re-computing it.
Definition: Dominators.cpp:284
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
Optional< const MDOperand * > findStringMetadataForLoop(Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:1001
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
void initializeLoopDistributeLegacyPass(PassRegistry &)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Drive the analysis of memory accesses in the loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) namespace llvm
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:226
static const char ldist_name[]
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
const SCEVUnionPredicate & getUnionPredicate() const
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
static bool runImpl(CallGraphSCC &SCC, CallGraph &CG, function_ref< AAResults &(Function &F)> AARGetter, unsigned MaxElements)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:237
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop ""Distribution for loop marked with #pragma loop distribute(enable)"))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:120
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1726
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void verify(const DominatorTreeBase< BlockT > &DomTree) const
Definition: LoopInfoImpl.h:541
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification ""after Loop Distribution"), cl::init(false))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:71
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
int * Ptr
Dependence - This class represents a dependence between two memory memory references in a function...
The optimization diagnostic interface.
const BasicBlock * getParent() const
Definition: Instruction.h:62
FunctionPass * createLoopDistributePass()
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:905