LLVM  7.0.0svn
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Loop Distribution Pass. Its main focus is to
11 // distribute loops that cannot be vectorized due to dependence cycles. It
12 // tries to isolate the offending dependences into a new loop allowing
13 // vectorization of the remaining parts.
14 //
15 // For dependence analysis, the pass uses the LoopVectorizer's
16 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
17 // memory operations, special care is taken to preserve the lexical order of
18 // these operations.
19 //
20 // Similarly to the Vectorizer, the pass also supports loop versioning to
21 // run-time disambiguate potentially overlapping arrays.
22 //
23 //===----------------------------------------------------------------------===//
24 
26 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
42 #include "llvm/Analysis/LoopInfo.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DiagnosticInfo.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <cassert>
71 #include <functional>
72 #include <list>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define LDIST_NAME "loop-distribute"
79 #define DEBUG_TYPE LDIST_NAME
80 
81 static cl::opt<bool>
82  LDistVerify("loop-distribute-verify", cl::Hidden,
83  cl::desc("Turn on DominatorTree and LoopInfo verification "
84  "after Loop Distribution"),
85  cl::init(false));
86 
88  "loop-distribute-non-if-convertible", cl::Hidden,
89  cl::desc("Whether to distribute into a loop that may not be "
90  "if-convertible by the loop vectorizer"),
91  cl::init(false));
92 
94  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
95  cl::desc("The maximum number of SCEV checks allowed for Loop "
96  "Distribution"));
97 
99  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
100  cl::Hidden,
101  cl::desc(
102  "The maximum number of SCEV checks allowed for Loop "
103  "Distribution for loop marked with #pragma loop distribute(enable)"));
104 
106  "enable-loop-distribute", cl::Hidden,
107  cl::desc("Enable the new, experimental LoopDistribution Pass"),
108  cl::init(false));
109 
110 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
111 
112 namespace {
113 
114 /// Maintains the set of instructions of the loop for a partition before
115 /// cloning. After cloning, it hosts the new loop.
116 class InstPartition {
117  using InstructionSet = SmallPtrSet<Instruction *, 8>;
118 
119 public:
120  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
121  : DepCycle(DepCycle), OrigLoop(L) {
122  Set.insert(I);
123  }
124 
125  /// Returns whether this partition contains a dependence cycle.
126  bool hasDepCycle() const { return DepCycle; }
127 
128  /// Adds an instruction to this partition.
129  void add(Instruction *I) { Set.insert(I); }
130 
131  /// Collection accessors.
132  InstructionSet::iterator begin() { return Set.begin(); }
133  InstructionSet::iterator end() { return Set.end(); }
134  InstructionSet::const_iterator begin() const { return Set.begin(); }
135  InstructionSet::const_iterator end() const { return Set.end(); }
136  bool empty() const { return Set.empty(); }
137 
138  /// Moves this partition into \p Other. This partition becomes empty
139  /// after this.
140  void moveTo(InstPartition &Other) {
141  Other.Set.insert(Set.begin(), Set.end());
142  Set.clear();
143  Other.DepCycle |= DepCycle;
144  }
145 
146  /// Populates the partition with a transitive closure of all the
147  /// instructions that the seeded instructions dependent on.
148  void populateUsedSet() {
149  // FIXME: We currently don't use control-dependence but simply include all
150  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
151  // up.
152  for (auto *B : OrigLoop->getBlocks())
153  Set.insert(B->getTerminator());
154 
155  // Follow the use-def chains to form a transitive closure of all the
156  // instructions that the originally seeded instructions depend on.
157  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
158  while (!Worklist.empty()) {
159  Instruction *I = Worklist.pop_back_val();
160  // Insert instructions from the loop that we depend on.
161  for (Value *V : I->operand_values()) {
162  auto *I = dyn_cast<Instruction>(V);
163  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
164  Worklist.push_back(I);
165  }
166  }
167  }
168 
169  /// Clones the original loop.
170  ///
171  /// Updates LoopInfo and DominatorTree using the information that block \p
172  /// LoopDomBB dominates the loop.
173  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
174  unsigned Index, LoopInfo *LI,
175  DominatorTree *DT) {
176  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
177  VMap, Twine(".ldist") + Twine(Index),
178  LI, DT, ClonedLoopBlocks);
179  return ClonedLoop;
180  }
181 
182  /// The cloned loop. If this partition is mapped to the original loop,
183  /// this is null.
184  const Loop *getClonedLoop() const { return ClonedLoop; }
185 
186  /// Returns the loop where this partition ends up after distribution.
187  /// If this partition is mapped to the original loop then use the block from
188  /// the loop.
189  const Loop *getDistributedLoop() const {
190  return ClonedLoop ? ClonedLoop : OrigLoop;
191  }
192 
193  /// The VMap that is populated by cloning and then used in
194  /// remapinstruction to remap the cloned instructions.
195  ValueToValueMapTy &getVMap() { return VMap; }
196 
197  /// Remaps the cloned instructions using VMap.
198  void remapInstructions() {
199  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
200  }
201 
202  /// Based on the set of instructions selected for this partition,
203  /// removes the unnecessary ones.
204  void removeUnusedInsts() {
206 
207  for (auto *Block : OrigLoop->getBlocks())
208  for (auto &Inst : *Block)
209  if (!Set.count(&Inst)) {
210  Instruction *NewInst = &Inst;
211  if (!VMap.empty())
212  NewInst = cast<Instruction>(VMap[NewInst]);
213 
214  assert(!isa<BranchInst>(NewInst) &&
215  "Branches are marked used early on");
216  Unused.push_back(NewInst);
217  }
218 
219  // Delete the instructions backwards, as it has a reduced likelihood of
220  // having to update as many def-use and use-def chains.
221  for (auto *Inst : reverse(Unused)) {
222  if (!Inst->use_empty())
223  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
224  Inst->eraseFromParent();
225  }
226  }
227 
228  void print() const {
229  if (DepCycle)
230  dbgs() << " (cycle)\n";
231  for (auto *I : Set)
232  // Prefix with the block name.
233  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
234  }
235 
236  void printBlocks() const {
237  for (auto *BB : getDistributedLoop()->getBlocks())
238  dbgs() << *BB;
239  }
240 
241 private:
242  /// Instructions from OrigLoop selected for this partition.
243  InstructionSet Set;
244 
245  /// Whether this partition contains a dependence cycle.
246  bool DepCycle;
247 
248  /// The original loop.
249  Loop *OrigLoop;
250 
251  /// The cloned loop. If this partition is mapped to the original loop,
252  /// this is null.
253  Loop *ClonedLoop = nullptr;
254 
255  /// The blocks of ClonedLoop including the preheader. If this
256  /// partition is mapped to the original loop, this is empty.
257  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
258 
259  /// These gets populated once the set of instructions have been
260  /// finalized. If this partition is mapped to the original loop, these are not
261  /// set.
262  ValueToValueMapTy VMap;
263 };
264 
265 /// Holds the set of Partitions. It populates them, merges them and then
266 /// clones the loops.
267 class InstPartitionContainer {
268  using InstToPartitionIdT = DenseMap<Instruction *, int>;
269 
270 public:
271  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
272  : L(L), LI(LI), DT(DT) {}
273 
274  /// Returns the number of partitions.
275  unsigned getSize() const { return PartitionContainer.size(); }
276 
277  /// Adds \p Inst into the current partition if that is marked to
278  /// contain cycles. Otherwise start a new partition for it.
279  void addToCyclicPartition(Instruction *Inst) {
280  // If the current partition is non-cyclic. Start a new one.
281  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
282  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
283  else
284  PartitionContainer.back().add(Inst);
285  }
286 
287  /// Adds \p Inst into a partition that is not marked to contain
288  /// dependence cycles.
289  ///
290  // Initially we isolate memory instructions into as many partitions as
291  // possible, then later we may merge them back together.
292  void addToNewNonCyclicPartition(Instruction *Inst) {
293  PartitionContainer.emplace_back(Inst, L);
294  }
295 
296  /// Merges adjacent non-cyclic partitions.
297  ///
298  /// The idea is that we currently only want to isolate the non-vectorizable
299  /// partition. We could later allow more distribution among these partition
300  /// too.
301  void mergeAdjacentNonCyclic() {
302  mergeAdjacentPartitionsIf(
303  [](const InstPartition *P) { return !P->hasDepCycle(); });
304  }
305 
306  /// If a partition contains only conditional stores, we won't vectorize
307  /// it. Try to merge it with a previous cyclic partition.
308  void mergeNonIfConvertible() {
309  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
310  if (Partition->hasDepCycle())
311  return true;
312 
313  // Now, check if all stores are conditional in this partition.
314  bool seenStore = false;
315 
316  for (auto *Inst : *Partition)
317  if (isa<StoreInst>(Inst)) {
318  seenStore = true;
320  return false;
321  }
322  return seenStore;
323  });
324  }
325 
326  /// Merges the partitions according to various heuristics.
327  void mergeBeforePopulating() {
328  mergeAdjacentNonCyclic();
330  mergeNonIfConvertible();
331  }
332 
333  /// Merges partitions in order to ensure that no loads are duplicated.
334  ///
335  /// We can't duplicate loads because that could potentially reorder them.
336  /// LoopAccessAnalysis provides dependency information with the context that
337  /// the order of memory operation is preserved.
338  ///
339  /// Return if any partitions were merged.
340  bool mergeToAvoidDuplicatedLoads() {
341  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
342  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
343 
344  LoadToPartitionT LoadToPartition;
345  ToBeMergedT ToBeMerged;
346 
347  // Step through the partitions and create equivalence between partitions
348  // that contain the same load. Also put partitions in between them in the
349  // same equivalence class to avoid reordering of memory operations.
350  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
351  E = PartitionContainer.end();
352  I != E; ++I) {
353  auto *PartI = &*I;
354 
355  // If a load occurs in two partitions PartI and PartJ, merge all
356  // partitions (PartI, PartJ] into PartI.
357  for (Instruction *Inst : *PartI)
358  if (isa<LoadInst>(Inst)) {
359  bool NewElt;
360  LoadToPartitionT::iterator LoadToPart;
361 
362  std::tie(LoadToPart, NewElt) =
363  LoadToPartition.insert(std::make_pair(Inst, PartI));
364  if (!NewElt) {
365  LLVM_DEBUG(dbgs()
366  << "Merging partitions due to this load in multiple "
367  << "partitions: " << PartI << ", " << LoadToPart->second
368  << "\n"
369  << *Inst << "\n");
370 
371  auto PartJ = I;
372  do {
373  --PartJ;
374  ToBeMerged.unionSets(PartI, &*PartJ);
375  } while (&*PartJ != LoadToPart->second);
376  }
377  }
378  }
379  if (ToBeMerged.empty())
380  return false;
381 
382  // Merge the member of an equivalence class into its class leader. This
383  // makes the members empty.
384  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
385  I != E; ++I) {
386  if (!I->isLeader())
387  continue;
388 
389  auto PartI = I->getData();
390  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
391  ToBeMerged.member_end())) {
392  PartJ->moveTo(*PartI);
393  }
394  }
395 
396  // Remove the empty partitions.
397  PartitionContainer.remove_if(
398  [](const InstPartition &P) { return P.empty(); });
399 
400  return true;
401  }
402 
403  /// Sets up the mapping between instructions to partitions. If the
404  /// instruction is duplicated across multiple partitions, set the entry to -1.
405  void setupPartitionIdOnInstructions() {
406  int PartitionID = 0;
407  for (const auto &Partition : PartitionContainer) {
408  for (Instruction *Inst : Partition) {
409  bool NewElt;
410  InstToPartitionIdT::iterator Iter;
411 
412  std::tie(Iter, NewElt) =
413  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
414  if (!NewElt)
415  Iter->second = -1;
416  }
417  ++PartitionID;
418  }
419  }
420 
421  /// Populates the partition with everything that the seeding
422  /// instructions require.
423  void populateUsedSet() {
424  for (auto &P : PartitionContainer)
425  P.populateUsedSet();
426  }
427 
428  /// This performs the main chunk of the work of cloning the loops for
429  /// the partitions.
430  void cloneLoops() {
431  BasicBlock *OrigPH = L->getLoopPreheader();
432  // At this point the predecessor of the preheader is either the memcheck
433  // block or the top part of the original preheader.
434  BasicBlock *Pred = OrigPH->getSinglePredecessor();
435  assert(Pred && "Preheader does not have a single predecessor");
436  BasicBlock *ExitBlock = L->getExitBlock();
437  assert(ExitBlock && "No single exit block");
438  Loop *NewLoop;
439 
440  assert(!PartitionContainer.empty() && "at least two partitions expected");
441  // We're cloning the preheader along with the loop so we already made sure
442  // it was empty.
443  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
444  "preheader not empty");
445 
446  // Create a loop for each partition except the last. Clone the original
447  // loop before PH along with adding a preheader for the cloned loop. Then
448  // update PH to point to the newly added preheader.
449  BasicBlock *TopPH = OrigPH;
450  unsigned Index = getSize() - 1;
451  for (auto I = std::next(PartitionContainer.rbegin()),
452  E = PartitionContainer.rend();
453  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
454  auto *Part = &*I;
455 
456  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
457 
458  Part->getVMap()[ExitBlock] = TopPH;
459  Part->remapInstructions();
460  }
461  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
462 
463  // Now go in forward order and update the immediate dominator for the
464  // preheaders with the exiting block of the previous loop. Dominance
465  // within the loop is updated in cloneLoopWithPreheader.
466  for (auto Curr = PartitionContainer.cbegin(),
467  Next = std::next(PartitionContainer.cbegin()),
468  E = PartitionContainer.cend();
469  Next != E; ++Curr, ++Next)
471  Next->getDistributedLoop()->getLoopPreheader(),
472  Curr->getDistributedLoop()->getExitingBlock());
473  }
474 
475  /// Removes the dead instructions from the cloned loops.
476  void removeUnusedInsts() {
477  for (auto &Partition : PartitionContainer)
478  Partition.removeUnusedInsts();
479  }
480 
481  /// For each memory pointer, it computes the partitionId the pointer is
482  /// used in.
483  ///
484  /// This returns an array of int where the I-th entry corresponds to I-th
485  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
486  /// partitions its entry is set to -1.
488  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
489  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
490 
491  unsigned N = RtPtrCheck->Pointers.size();
492  SmallVector<int, 8> PtrToPartitions(N);
493  for (unsigned I = 0; I < N; ++I) {
494  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
495  auto Instructions =
496  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
497 
498  int &Partition = PtrToPartitions[I];
499  // First set it to uninitialized.
500  Partition = -2;
501  for (Instruction *Inst : Instructions) {
502  // Note that this could be -1 if Inst is duplicated across multiple
503  // partitions.
504  int ThisPartition = this->InstToPartitionId[Inst];
505  if (Partition == -2)
506  Partition = ThisPartition;
507  // -1 means belonging to multiple partitions.
508  else if (Partition == -1)
509  break;
510  else if (Partition != (int)ThisPartition)
511  Partition = -1;
512  }
513  assert(Partition != -2 && "Pointer not belonging to any partition");
514  }
515 
516  return PtrToPartitions;
517  }
518 
519  void print(raw_ostream &OS) const {
520  unsigned Index = 0;
521  for (const auto &P : PartitionContainer) {
522  OS << "Partition " << Index++ << " (" << &P << "):\n";
523  P.print();
524  }
525  }
526 
527  void dump() const { print(dbgs()); }
528 
529 #ifndef NDEBUG
530  friend raw_ostream &operator<<(raw_ostream &OS,
531  const InstPartitionContainer &Partitions) {
532  Partitions.print(OS);
533  return OS;
534  }
535 #endif
536 
537  void printBlocks() const {
538  unsigned Index = 0;
539  for (const auto &P : PartitionContainer) {
540  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
541  P.printBlocks();
542  }
543  }
544 
545 private:
546  using PartitionContainerT = std::list<InstPartition>;
547 
548  /// List of partitions.
549  PartitionContainerT PartitionContainer;
550 
551  /// Mapping from Instruction to partition Id. If the instruction
552  /// belongs to multiple partitions the entry contains -1.
553  InstToPartitionIdT InstToPartitionId;
554 
555  Loop *L;
556  LoopInfo *LI;
557  DominatorTree *DT;
558 
559  /// The control structure to merge adjacent partitions if both satisfy
560  /// the \p Predicate.
561  template <class UnaryPredicate>
562  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
563  InstPartition *PrevMatch = nullptr;
564  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
565  auto DoesMatch = Predicate(&*I);
566  if (PrevMatch == nullptr && DoesMatch) {
567  PrevMatch = &*I;
568  ++I;
569  } else if (PrevMatch != nullptr && DoesMatch) {
570  I->moveTo(*PrevMatch);
571  I = PartitionContainer.erase(I);
572  } else {
573  PrevMatch = nullptr;
574  ++I;
575  }
576  }
577  }
578 };
579 
580 /// For each memory instruction, this class maintains difference of the
581 /// number of unsafe dependences that start out from this instruction minus
582 /// those that end here.
583 ///
584 /// By traversing the memory instructions in program order and accumulating this
585 /// number, we know whether any unsafe dependence crosses over a program point.
586 class MemoryInstructionDependences {
588 
589 public:
590  struct Entry {
591  Instruction *Inst;
592  unsigned NumUnsafeDependencesStartOrEnd = 0;
593 
594  Entry(Instruction *Inst) : Inst(Inst) {}
595  };
596 
597  using AccessesType = SmallVector<Entry, 8>;
598 
599  AccessesType::const_iterator begin() const { return Accesses.begin(); }
600  AccessesType::const_iterator end() const { return Accesses.end(); }
601 
602  MemoryInstructionDependences(
603  const SmallVectorImpl<Instruction *> &Instructions,
604  const SmallVectorImpl<Dependence> &Dependences) {
605  Accesses.append(Instructions.begin(), Instructions.end());
606 
607  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
608  for (auto &Dep : Dependences)
609  if (Dep.isPossiblyBackward()) {
610  // Note that the designations source and destination follow the program
611  // order, i.e. source is always first. (The direction is given by the
612  // DepType.)
613  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
614  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
615 
616  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
617  }
618  }
619 
620 private:
621  AccessesType Accesses;
622 };
623 
624 /// The actual class performing the per-loop work.
625 class LoopDistributeForLoop {
626 public:
627  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
629  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
630  setForced();
631  }
632 
633  /// Try to distribute an inner-most loop.
634  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
635  assert(L->empty() && "Only process inner loops.");
636 
637  LLVM_DEBUG(dbgs() << "\nLDist: In \""
638  << L->getHeader()->getParent()->getName()
639  << "\" checking " << *L << "\n");
640 
641  if (!L->getExitBlock())
642  return fail("MultipleExitBlocks", "multiple exit blocks");
643  if (!L->isLoopSimplifyForm())
644  return fail("NotLoopSimplifyForm",
645  "loop is not in loop-simplify form");
646 
647  BasicBlock *PH = L->getLoopPreheader();
648 
649  // LAA will check that we only have a single exiting block.
650  LAI = &GetLAA(*L);
651 
652  // Currently, we only distribute to isolate the part of the loop with
653  // dependence cycles to enable partial vectorization.
654  if (LAI->canVectorizeMemory())
655  return fail("MemOpsCanBeVectorized",
656  "memory operations are safe for vectorization");
657 
658  auto *Dependences = LAI->getDepChecker().getDependences();
659  if (!Dependences || Dependences->empty())
660  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
661 
662  InstPartitionContainer Partitions(L, LI, DT);
663 
664  // First, go through each memory operation and assign them to consecutive
665  // partitions (the order of partitions follows program order). Put those
666  // with unsafe dependences into "cyclic" partition otherwise put each store
667  // in its own "non-cyclic" partition (we'll merge these later).
668  //
669  // Note that a memory operation (e.g. Load2 below) at a program point that
670  // has an unsafe dependence (Store3->Load1) spanning over it must be
671  // included in the same cyclic partition as the dependent operations. This
672  // is to preserve the original program order after distribution. E.g.:
673  //
674  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
675  // Load1 -. 1 0->1
676  // Load2 | /Unsafe/ 0 1
677  // Store3 -' -1 1->0
678  // Load4 0 0
679  //
680  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
681  // we just keep assigning to the same cyclic partition until
682  // NumUnsafeDependencesActive reaches 0.
683  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
684  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
685  *Dependences);
686 
687  int NumUnsafeDependencesActive = 0;
688  for (auto &InstDep : MID) {
689  Instruction *I = InstDep.Inst;
690  // We update NumUnsafeDependencesActive post-instruction, catch the
691  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
692  if (NumUnsafeDependencesActive ||
693  InstDep.NumUnsafeDependencesStartOrEnd > 0)
694  Partitions.addToCyclicPartition(I);
695  else
696  Partitions.addToNewNonCyclicPartition(I);
697  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
698  assert(NumUnsafeDependencesActive >= 0 &&
699  "Negative number of dependences active");
700  }
701 
702  // Add partitions for values used outside. These partitions can be out of
703  // order from the original program order. This is OK because if the
704  // partition uses a load we will merge this partition with the original
705  // partition of the load that we set up in the previous loop (see
706  // mergeToAvoidDuplicatedLoads).
707  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
708  for (auto *Inst : DefsUsedOutside)
709  Partitions.addToNewNonCyclicPartition(Inst);
710 
711  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
712  if (Partitions.getSize() < 2)
713  return fail("CantIsolateUnsafeDeps",
714  "cannot isolate unsafe dependencies");
715 
716  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
717  // should be able to vectorize these together.
718  Partitions.mergeBeforePopulating();
719  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
720  if (Partitions.getSize() < 2)
721  return fail("CantIsolateUnsafeDeps",
722  "cannot isolate unsafe dependencies");
723 
724  // Now, populate the partitions with non-memory operations.
725  Partitions.populateUsedSet();
726  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
727 
728  // In order to preserve original lexical order for loads, keep them in the
729  // partition that we set up in the MemoryInstructionDependences loop.
730  if (Partitions.mergeToAvoidDuplicatedLoads()) {
731  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
732  << Partitions);
733  if (Partitions.getSize() < 2)
734  return fail("CantIsolateUnsafeDeps",
735  "cannot isolate unsafe dependencies");
736  }
737 
738  // Don't distribute the loop if we need too many SCEV run-time checks.
739  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
740  if (Pred.getComplexity() > (IsForced.getValueOr(false)
743  return fail("TooManySCEVRuntimeChecks",
744  "too many SCEV run-time checks needed.\n");
745 
746  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
747  // We're done forming the partitions set up the reverse mapping from
748  // instructions to partitions.
749  Partitions.setupPartitionIdOnInstructions();
750 
751  // To keep things simple have an empty preheader before we version or clone
752  // the loop. (Also split if this has no predecessor, i.e. entry, because we
753  // rely on PH having a predecessor.)
754  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
755  SplitBlock(PH, PH->getTerminator(), DT, LI);
756 
757  // If we need run-time checks, version the loop now.
758  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
759  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
760  const auto &AllChecks = RtPtrChecking->getChecks();
761  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
762  RtPtrChecking);
763 
764  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
765  LLVM_DEBUG(dbgs() << "\nPointers:\n");
767  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
768  LVer.setAliasChecks(std::move(Checks));
769  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
770  LVer.versionLoop(DefsUsedOutside);
771  LVer.annotateLoopWithNoAlias();
772  }
773 
774  // Create identical copies of the original loop for each partition and hook
775  // them up sequentially.
776  Partitions.cloneLoops();
777 
778  // Now, we remove the instruction from each loop that don't belong to that
779  // partition.
780  Partitions.removeUnusedInsts();
781  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
782  LLVM_DEBUG(Partitions.printBlocks());
783 
784  if (LDistVerify) {
785  LI->verify(*DT);
787  }
788 
789  ++NumLoopsDistributed;
790  // Report the success.
791  ORE->emit([&]() {
792  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
793  L->getHeader())
794  << "distributed loop";
795  });
796  return true;
797  }
798 
799  /// Provide diagnostics then \return with false.
800  bool fail(StringRef RemarkName, StringRef Message) {
801  LLVMContext &Ctx = F->getContext();
802  bool Forced = isForced().getValueOr(false);
803 
804  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
805 
806  // With Rpass-missed report that distribution failed.
807  ORE->emit([&]() {
808  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
809  L->getStartLoc(), L->getHeader())
810  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
811  "more "
812  "info";
813  });
814 
815  // With Rpass-analysis report why. This is on by default if distribution
816  // was requested explicitly.
817  ORE->emit(OptimizationRemarkAnalysis(
819  RemarkName, L->getStartLoc(), L->getHeader())
820  << "loop not distributed: " << Message);
821 
822  // Also issue a warning if distribution was requested explicitly but it
823  // failed.
824  if (Forced)
826  *F, L->getStartLoc(), "loop not distributed: failed "
827  "explicitly specified loop distribution"));
828 
829  return false;
830  }
831 
832  /// Return if distribution forced to be enabled/disabled for the loop.
833  ///
834  /// If the optional has a value, it indicates whether distribution was forced
835  /// to be enabled (true) or disabled (false). If the optional has no value
836  /// distribution was not forced either way.
837  const Optional<bool> &isForced() const { return IsForced; }
838 
839 private:
840  /// Filter out checks between pointers from the same partition.
841  ///
842  /// \p PtrToPartition contains the partition number for pointers. Partition
843  /// number -1 means that the pointer is used in multiple partitions. In this
844  /// case we can't safely omit the check.
846  includeOnlyCrossPartitionChecks(
848  const SmallVectorImpl<int> &PtrToPartition,
849  const RuntimePointerChecking *RtPtrChecking) {
851 
852  copy_if(AllChecks, std::back_inserter(Checks),
854  for (unsigned PtrIdx1 : Check.first->Members)
855  for (unsigned PtrIdx2 : Check.second->Members)
856  // Only include this check if there is a pair of pointers
857  // that require checking and the pointers fall into
858  // separate partitions.
859  //
860  // (Note that we already know at this point that the two
861  // pointer groups need checking but it doesn't follow
862  // that each pair of pointers within the two groups need
863  // checking as well.
864  //
865  // In other words we don't want to include a check just
866  // because there is a pair of pointers between the two
867  // pointer groups that require checks and a different
868  // pair whose pointers fall into different partitions.)
869  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
871  PtrToPartition, PtrIdx1, PtrIdx2))
872  return true;
873  return false;
874  });
875 
876  return Checks;
877  }
878 
879  /// Check whether the loop metadata is forcing distribution to be
880  /// enabled/disabled.
881  void setForced() {
883  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
884  if (!Value)
885  return;
886 
887  const MDOperand *Op = *Value;
888  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
889  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
890  }
891 
892  Loop *L;
893  Function *F;
894 
895  // Analyses used.
896  LoopInfo *LI;
897  const LoopAccessInfo *LAI = nullptr;
898  DominatorTree *DT;
899  ScalarEvolution *SE;
901 
902  /// Indicates whether distribution is forced to be enabled/disabled for
903  /// the loop.
904  ///
905  /// If the optional has a value, it indicates whether distribution was forced
906  /// to be enabled (true) or disabled (false). If the optional has no value
907  /// distribution was not forced either way.
908  Optional<bool> IsForced;
909 };
910 
911 } // end anonymous namespace
912 
913 /// Shared implementation between new and old PMs.
914 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
916  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
917  // Build up a worklist of inner-loops to vectorize. This is necessary as the
918  // act of distributing a loop creates new loops and can invalidate iterators
919  // across the loops.
920  SmallVector<Loop *, 8> Worklist;
921 
922  for (Loop *TopLevelLoop : *LI)
923  for (Loop *L : depth_first(TopLevelLoop))
924  // We only handle inner-most loops.
925  if (L->empty())
926  Worklist.push_back(L);
927 
928  // Now walk the identified inner loops.
929  bool Changed = false;
930  for (Loop *L : Worklist) {
931  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
932 
933  // If distribution was forced for the specific loop to be
934  // enabled/disabled, follow that. Otherwise use the global flag.
935  if (LDL.isForced().getValueOr(EnableLoopDistribute))
936  Changed |= LDL.processLoop(GetLAA);
937  }
938 
939  // Process each loop nest in the function.
940  return Changed;
941 }
942 
943 namespace {
944 
945 /// The pass class.
946 class LoopDistributeLegacy : public FunctionPass {
947 public:
948  static char ID;
949 
950  LoopDistributeLegacy() : FunctionPass(ID) {
951  // The default is set by the caller.
953  }
954 
955  bool runOnFunction(Function &F) override {
956  if (skipFunction(F))
957  return false;
958 
959  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
960  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
961  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
962  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
963  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
964  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
965  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
966 
967  return runImpl(F, LI, DT, SE, ORE, GetLAA);
968  }
969 
970  void getAnalysisUsage(AnalysisUsage &AU) const override {
979  }
980 };
981 
982 } // end anonymous namespace
983 
986  auto &LI = AM.getResult<LoopAnalysis>(F);
987  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
988  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
990 
991  // We don't directly need these analyses but they're required for loop
992  // analyses so provide them below.
993  auto &AA = AM.getResult<AAManager>(F);
994  auto &AC = AM.getResult<AssumptionAnalysis>(F);
995  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
996  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
997 
998  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
999  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1000  [&](Loop &L) -> const LoopAccessInfo & {
1001  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1002  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1003  };
1004 
1005  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1006  if (!Changed)
1007  return PreservedAnalyses::all();
1008  PreservedAnalyses PA;
1009  PA.preserve<LoopAnalysis>();
1011  PA.preserve<GlobalsAA>();
1012  return PA;
1013 }
1014 
1016 
1017 static const char ldist_name[] = "Loop Distribution";
1018 
1019 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1020  false)
1026 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1027 
1028 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:709
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:241
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:955
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:225
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:633
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:241
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:939
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:1208
BlockT * getHeader() const
Definition: LoopInfo.h:100
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:237
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:235
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:346
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
Optional< const MDOperand * > findStringMetadataForLoop(Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:1289
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:192
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2023
bool empty() const
Definition: LoopInfo.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:73
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:119
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:67
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946