LLVM  6.0.0svn
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Loop Distribution Pass. Its main focus is to
11 // distribute loops that cannot be vectorized due to dependence cycles. It
12 // tries to isolate the offending dependences into a new loop allowing
13 // vectorization of the remaining parts.
14 //
15 // For dependence analysis, the pass uses the LoopVectorizer's
16 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
17 // memory operations, special care is taken to preserve the lexical order of
18 // these operations.
19 //
20 // Similarly to the Vectorizer, the pass also supports loop versioning to
21 // run-time disambiguate potentially overlapping arrays.
22 //
23 //===----------------------------------------------------------------------===//
24 
26 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
42 #include "llvm/Analysis/LoopInfo.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DiagnosticInfo.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <cassert>
71 #include <functional>
72 #include <list>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define LDIST_NAME "loop-distribute"
79 #define DEBUG_TYPE LDIST_NAME
80 
81 static cl::opt<bool>
82  LDistVerify("loop-distribute-verify", cl::Hidden,
83  cl::desc("Turn on DominatorTree and LoopInfo verification "
84  "after Loop Distribution"),
85  cl::init(false));
86 
88  "loop-distribute-non-if-convertible", cl::Hidden,
89  cl::desc("Whether to distribute into a loop that may not be "
90  "if-convertible by the loop vectorizer"),
91  cl::init(false));
92 
94  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
95  cl::desc("The maximum number of SCEV checks allowed for Loop "
96  "Distribution"));
97 
99  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
100  cl::Hidden,
101  cl::desc(
102  "The maximum number of SCEV checks allowed for Loop "
103  "Distribution for loop marked with #pragma loop distribute(enable)"));
104 
106  "enable-loop-distribute", cl::Hidden,
107  cl::desc("Enable the new, experimental LoopDistribution Pass"),
108  cl::init(false));
109 
110 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
111 
112 namespace {
113 
114 /// \brief Maintains the set of instructions of the loop for a partition before
115 /// cloning. After cloning, it hosts the new loop.
116 class InstPartition {
117  using InstructionSet = SmallPtrSet<Instruction *, 8>;
118 
119 public:
120  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
121  : DepCycle(DepCycle), OrigLoop(L) {
122  Set.insert(I);
123  }
124 
125  /// \brief Returns whether this partition contains a dependence cycle.
126  bool hasDepCycle() const { return DepCycle; }
127 
128  /// \brief Adds an instruction to this partition.
129  void add(Instruction *I) { Set.insert(I); }
130 
131  /// \brief Collection accessors.
132  InstructionSet::iterator begin() { return Set.begin(); }
133  InstructionSet::iterator end() { return Set.end(); }
134  InstructionSet::const_iterator begin() const { return Set.begin(); }
135  InstructionSet::const_iterator end() const { return Set.end(); }
136  bool empty() const { return Set.empty(); }
137 
138  /// \brief Moves this partition into \p Other. This partition becomes empty
139  /// after this.
140  void moveTo(InstPartition &Other) {
141  Other.Set.insert(Set.begin(), Set.end());
142  Set.clear();
143  Other.DepCycle |= DepCycle;
144  }
145 
146  /// \brief Populates the partition with a transitive closure of all the
147  /// instructions that the seeded instructions dependent on.
148  void populateUsedSet() {
149  // FIXME: We currently don't use control-dependence but simply include all
150  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
151  // up.
152  for (auto *B : OrigLoop->getBlocks())
153  Set.insert(B->getTerminator());
154 
155  // Follow the use-def chains to form a transitive closure of all the
156  // instructions that the originally seeded instructions depend on.
157  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
158  while (!Worklist.empty()) {
159  Instruction *I = Worklist.pop_back_val();
160  // Insert instructions from the loop that we depend on.
161  for (Value *V : I->operand_values()) {
162  auto *I = dyn_cast<Instruction>(V);
163  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
164  Worklist.push_back(I);
165  }
166  }
167  }
168 
169  /// \brief Clones the original loop.
170  ///
171  /// Updates LoopInfo and DominatorTree using the information that block \p
172  /// LoopDomBB dominates the loop.
173  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
174  unsigned Index, LoopInfo *LI,
175  DominatorTree *DT) {
176  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
177  VMap, Twine(".ldist") + Twine(Index),
178  LI, DT, ClonedLoopBlocks);
179  return ClonedLoop;
180  }
181 
182  /// \brief The cloned loop. If this partition is mapped to the original loop,
183  /// this is null.
184  const Loop *getClonedLoop() const { return ClonedLoop; }
185 
186  /// \brief Returns the loop where this partition ends up after distribution.
187  /// If this partition is mapped to the original loop then use the block from
188  /// the loop.
189  const Loop *getDistributedLoop() const {
190  return ClonedLoop ? ClonedLoop : OrigLoop;
191  }
192 
193  /// \brief The VMap that is populated by cloning and then used in
194  /// remapinstruction to remap the cloned instructions.
195  ValueToValueMapTy &getVMap() { return VMap; }
196 
197  /// \brief Remaps the cloned instructions using VMap.
198  void remapInstructions() {
199  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
200  }
201 
202  /// \brief Based on the set of instructions selected for this partition,
203  /// removes the unnecessary ones.
204  void removeUnusedInsts() {
206 
207  for (auto *Block : OrigLoop->getBlocks())
208  for (auto &Inst : *Block)
209  if (!Set.count(&Inst)) {
210  Instruction *NewInst = &Inst;
211  if (!VMap.empty())
212  NewInst = cast<Instruction>(VMap[NewInst]);
213 
214  assert(!isa<BranchInst>(NewInst) &&
215  "Branches are marked used early on");
216  Unused.push_back(NewInst);
217  }
218 
219  // Delete the instructions backwards, as it has a reduced likelihood of
220  // having to update as many def-use and use-def chains.
221  for (auto *Inst : reverse(Unused)) {
222  if (!Inst->use_empty())
223  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
224  Inst->eraseFromParent();
225  }
226  }
227 
228  void print() const {
229  if (DepCycle)
230  dbgs() << " (cycle)\n";
231  for (auto *I : Set)
232  // Prefix with the block name.
233  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
234  }
235 
236  void printBlocks() const {
237  for (auto *BB : getDistributedLoop()->getBlocks())
238  dbgs() << *BB;
239  }
240 
241 private:
242  /// \brief Instructions from OrigLoop selected for this partition.
243  InstructionSet Set;
244 
245  /// \brief Whether this partition contains a dependence cycle.
246  bool DepCycle;
247 
248  /// \brief The original loop.
249  Loop *OrigLoop;
250 
251  /// \brief The cloned loop. If this partition is mapped to the original loop,
252  /// this is null.
253  Loop *ClonedLoop = nullptr;
254 
255  /// \brief The blocks of ClonedLoop including the preheader. If this
256  /// partition is mapped to the original loop, this is empty.
257  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
258 
259  /// \brief These gets populated once the set of instructions have been
260  /// finalized. If this partition is mapped to the original loop, these are not
261  /// set.
262  ValueToValueMapTy VMap;
263 };
264 
265 /// \brief Holds the set of Partitions. It populates them, merges them and then
266 /// clones the loops.
267 class InstPartitionContainer {
268  using InstToPartitionIdT = DenseMap<Instruction *, int>;
269 
270 public:
271  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
272  : L(L), LI(LI), DT(DT) {}
273 
274  /// \brief Returns the number of partitions.
275  unsigned getSize() const { return PartitionContainer.size(); }
276 
277  /// \brief Adds \p Inst into the current partition if that is marked to
278  /// contain cycles. Otherwise start a new partition for it.
279  void addToCyclicPartition(Instruction *Inst) {
280  // If the current partition is non-cyclic. Start a new one.
281  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
282  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
283  else
284  PartitionContainer.back().add(Inst);
285  }
286 
287  /// \brief Adds \p Inst into a partition that is not marked to contain
288  /// dependence cycles.
289  ///
290  // Initially we isolate memory instructions into as many partitions as
291  // possible, then later we may merge them back together.
292  void addToNewNonCyclicPartition(Instruction *Inst) {
293  PartitionContainer.emplace_back(Inst, L);
294  }
295 
296  /// \brief Merges adjacent non-cyclic partitions.
297  ///
298  /// The idea is that we currently only want to isolate the non-vectorizable
299  /// partition. We could later allow more distribution among these partition
300  /// too.
301  void mergeAdjacentNonCyclic() {
302  mergeAdjacentPartitionsIf(
303  [](const InstPartition *P) { return !P->hasDepCycle(); });
304  }
305 
306  /// \brief If a partition contains only conditional stores, we won't vectorize
307  /// it. Try to merge it with a previous cyclic partition.
308  void mergeNonIfConvertible() {
309  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
310  if (Partition->hasDepCycle())
311  return true;
312 
313  // Now, check if all stores are conditional in this partition.
314  bool seenStore = false;
315 
316  for (auto *Inst : *Partition)
317  if (isa<StoreInst>(Inst)) {
318  seenStore = true;
320  return false;
321  }
322  return seenStore;
323  });
324  }
325 
326  /// \brief Merges the partitions according to various heuristics.
327  void mergeBeforePopulating() {
328  mergeAdjacentNonCyclic();
330  mergeNonIfConvertible();
331  }
332 
333  /// \brief Merges partitions in order to ensure that no loads are duplicated.
334  ///
335  /// We can't duplicate loads because that could potentially reorder them.
336  /// LoopAccessAnalysis provides dependency information with the context that
337  /// the order of memory operation is preserved.
338  ///
339  /// Return if any partitions were merged.
340  bool mergeToAvoidDuplicatedLoads() {
341  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
342  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
343 
344  LoadToPartitionT LoadToPartition;
345  ToBeMergedT ToBeMerged;
346 
347  // Step through the partitions and create equivalence between partitions
348  // that contain the same load. Also put partitions in between them in the
349  // same equivalence class to avoid reordering of memory operations.
350  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
351  E = PartitionContainer.end();
352  I != E; ++I) {
353  auto *PartI = &*I;
354 
355  // If a load occurs in two partitions PartI and PartJ, merge all
356  // partitions (PartI, PartJ] into PartI.
357  for (Instruction *Inst : *PartI)
358  if (isa<LoadInst>(Inst)) {
359  bool NewElt;
360  LoadToPartitionT::iterator LoadToPart;
361 
362  std::tie(LoadToPart, NewElt) =
363  LoadToPartition.insert(std::make_pair(Inst, PartI));
364  if (!NewElt) {
365  DEBUG(dbgs() << "Merging partitions due to this load in multiple "
366  << "partitions: " << PartI << ", "
367  << LoadToPart->second << "\n" << *Inst << "\n");
368 
369  auto PartJ = I;
370  do {
371  --PartJ;
372  ToBeMerged.unionSets(PartI, &*PartJ);
373  } while (&*PartJ != LoadToPart->second);
374  }
375  }
376  }
377  if (ToBeMerged.empty())
378  return false;
379 
380  // Merge the member of an equivalence class into its class leader. This
381  // makes the members empty.
382  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
383  I != E; ++I) {
384  if (!I->isLeader())
385  continue;
386 
387  auto PartI = I->getData();
388  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
389  ToBeMerged.member_end())) {
390  PartJ->moveTo(*PartI);
391  }
392  }
393 
394  // Remove the empty partitions.
395  PartitionContainer.remove_if(
396  [](const InstPartition &P) { return P.empty(); });
397 
398  return true;
399  }
400 
401  /// \brief Sets up the mapping between instructions to partitions. If the
402  /// instruction is duplicated across multiple partitions, set the entry to -1.
403  void setupPartitionIdOnInstructions() {
404  int PartitionID = 0;
405  for (const auto &Partition : PartitionContainer) {
406  for (Instruction *Inst : Partition) {
407  bool NewElt;
408  InstToPartitionIdT::iterator Iter;
409 
410  std::tie(Iter, NewElt) =
411  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
412  if (!NewElt)
413  Iter->second = -1;
414  }
415  ++PartitionID;
416  }
417  }
418 
419  /// \brief Populates the partition with everything that the seeding
420  /// instructions require.
421  void populateUsedSet() {
422  for (auto &P : PartitionContainer)
423  P.populateUsedSet();
424  }
425 
426  /// \brief This performs the main chunk of the work of cloning the loops for
427  /// the partitions.
428  void cloneLoops() {
429  BasicBlock *OrigPH = L->getLoopPreheader();
430  // At this point the predecessor of the preheader is either the memcheck
431  // block or the top part of the original preheader.
432  BasicBlock *Pred = OrigPH->getSinglePredecessor();
433  assert(Pred && "Preheader does not have a single predecessor");
434  BasicBlock *ExitBlock = L->getExitBlock();
435  assert(ExitBlock && "No single exit block");
436  Loop *NewLoop;
437 
438  assert(!PartitionContainer.empty() && "at least two partitions expected");
439  // We're cloning the preheader along with the loop so we already made sure
440  // it was empty.
441  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
442  "preheader not empty");
443 
444  // Create a loop for each partition except the last. Clone the original
445  // loop before PH along with adding a preheader for the cloned loop. Then
446  // update PH to point to the newly added preheader.
447  BasicBlock *TopPH = OrigPH;
448  unsigned Index = getSize() - 1;
449  for (auto I = std::next(PartitionContainer.rbegin()),
450  E = PartitionContainer.rend();
451  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
452  auto *Part = &*I;
453 
454  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
455 
456  Part->getVMap()[ExitBlock] = TopPH;
457  Part->remapInstructions();
458  }
459  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
460 
461  // Now go in forward order and update the immediate dominator for the
462  // preheaders with the exiting block of the previous loop. Dominance
463  // within the loop is updated in cloneLoopWithPreheader.
464  for (auto Curr = PartitionContainer.cbegin(),
465  Next = std::next(PartitionContainer.cbegin()),
466  E = PartitionContainer.cend();
467  Next != E; ++Curr, ++Next)
469  Next->getDistributedLoop()->getLoopPreheader(),
470  Curr->getDistributedLoop()->getExitingBlock());
471  }
472 
473  /// \brief Removes the dead instructions from the cloned loops.
474  void removeUnusedInsts() {
475  for (auto &Partition : PartitionContainer)
476  Partition.removeUnusedInsts();
477  }
478 
479  /// \brief For each memory pointer, it computes the partitionId the pointer is
480  /// used in.
481  ///
482  /// This returns an array of int where the I-th entry corresponds to I-th
483  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
484  /// partitions its entry is set to -1.
486  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
487  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
488 
489  unsigned N = RtPtrCheck->Pointers.size();
490  SmallVector<int, 8> PtrToPartitions(N);
491  for (unsigned I = 0; I < N; ++I) {
492  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
493  auto Instructions =
494  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
495 
496  int &Partition = PtrToPartitions[I];
497  // First set it to uninitialized.
498  Partition = -2;
499  for (Instruction *Inst : Instructions) {
500  // Note that this could be -1 if Inst is duplicated across multiple
501  // partitions.
502  int ThisPartition = this->InstToPartitionId[Inst];
503  if (Partition == -2)
504  Partition = ThisPartition;
505  // -1 means belonging to multiple partitions.
506  else if (Partition == -1)
507  break;
508  else if (Partition != (int)ThisPartition)
509  Partition = -1;
510  }
511  assert(Partition != -2 && "Pointer not belonging to any partition");
512  }
513 
514  return PtrToPartitions;
515  }
516 
517  void print(raw_ostream &OS) const {
518  unsigned Index = 0;
519  for (const auto &P : PartitionContainer) {
520  OS << "Partition " << Index++ << " (" << &P << "):\n";
521  P.print();
522  }
523  }
524 
525  void dump() const { print(dbgs()); }
526 
527 #ifndef NDEBUG
528  friend raw_ostream &operator<<(raw_ostream &OS,
529  const InstPartitionContainer &Partitions) {
530  Partitions.print(OS);
531  return OS;
532  }
533 #endif
534 
535  void printBlocks() const {
536  unsigned Index = 0;
537  for (const auto &P : PartitionContainer) {
538  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
539  P.printBlocks();
540  }
541  }
542 
543 private:
544  using PartitionContainerT = std::list<InstPartition>;
545 
546  /// \brief List of partitions.
547  PartitionContainerT PartitionContainer;
548 
549  /// \brief Mapping from Instruction to partition Id. If the instruction
550  /// belongs to multiple partitions the entry contains -1.
551  InstToPartitionIdT InstToPartitionId;
552 
553  Loop *L;
554  LoopInfo *LI;
555  DominatorTree *DT;
556 
557  /// \brief The control structure to merge adjacent partitions if both satisfy
558  /// the \p Predicate.
559  template <class UnaryPredicate>
560  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
561  InstPartition *PrevMatch = nullptr;
562  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
563  auto DoesMatch = Predicate(&*I);
564  if (PrevMatch == nullptr && DoesMatch) {
565  PrevMatch = &*I;
566  ++I;
567  } else if (PrevMatch != nullptr && DoesMatch) {
568  I->moveTo(*PrevMatch);
569  I = PartitionContainer.erase(I);
570  } else {
571  PrevMatch = nullptr;
572  ++I;
573  }
574  }
575  }
576 };
577 
578 /// \brief For each memory instruction, this class maintains difference of the
579 /// number of unsafe dependences that start out from this instruction minus
580 /// those that end here.
581 ///
582 /// By traversing the memory instructions in program order and accumulating this
583 /// number, we know whether any unsafe dependence crosses over a program point.
584 class MemoryInstructionDependences {
586 
587 public:
588  struct Entry {
589  Instruction *Inst;
590  unsigned NumUnsafeDependencesStartOrEnd = 0;
591 
592  Entry(Instruction *Inst) : Inst(Inst) {}
593  };
594 
595  using AccessesType = SmallVector<Entry, 8>;
596 
597  AccessesType::const_iterator begin() const { return Accesses.begin(); }
598  AccessesType::const_iterator end() const { return Accesses.end(); }
599 
600  MemoryInstructionDependences(
601  const SmallVectorImpl<Instruction *> &Instructions,
602  const SmallVectorImpl<Dependence> &Dependences) {
603  Accesses.append(Instructions.begin(), Instructions.end());
604 
605  DEBUG(dbgs() << "Backward dependences:\n");
606  for (auto &Dep : Dependences)
607  if (Dep.isPossiblyBackward()) {
608  // Note that the designations source and destination follow the program
609  // order, i.e. source is always first. (The direction is given by the
610  // DepType.)
611  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
612  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
613 
614  DEBUG(Dep.print(dbgs(), 2, Instructions));
615  }
616  }
617 
618 private:
619  AccessesType Accesses;
620 };
621 
622 /// \brief The actual class performing the per-loop work.
623 class LoopDistributeForLoop {
624 public:
625  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
627  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
628  setForced();
629  }
630 
631  /// \brief Try to distribute an inner-most loop.
632  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
633  assert(L->empty() && "Only process inner loops.");
634 
635  DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
636  << "\" checking " << *L << "\n");
637 
638  if (!L->getExitBlock())
639  return fail("MultipleExitBlocks", "multiple exit blocks");
640  if (!L->isLoopSimplifyForm())
641  return fail("NotLoopSimplifyForm",
642  "loop is not in loop-simplify form");
643 
644  BasicBlock *PH = L->getLoopPreheader();
645 
646  // LAA will check that we only have a single exiting block.
647  LAI = &GetLAA(*L);
648 
649  // Currently, we only distribute to isolate the part of the loop with
650  // dependence cycles to enable partial vectorization.
651  if (LAI->canVectorizeMemory())
652  return fail("MemOpsCanBeVectorized",
653  "memory operations are safe for vectorization");
654 
655  auto *Dependences = LAI->getDepChecker().getDependences();
656  if (!Dependences || Dependences->empty())
657  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
658 
659  InstPartitionContainer Partitions(L, LI, DT);
660 
661  // First, go through each memory operation and assign them to consecutive
662  // partitions (the order of partitions follows program order). Put those
663  // with unsafe dependences into "cyclic" partition otherwise put each store
664  // in its own "non-cyclic" partition (we'll merge these later).
665  //
666  // Note that a memory operation (e.g. Load2 below) at a program point that
667  // has an unsafe dependence (Store3->Load1) spanning over it must be
668  // included in the same cyclic partition as the dependent operations. This
669  // is to preserve the original program order after distribution. E.g.:
670  //
671  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
672  // Load1 -. 1 0->1
673  // Load2 | /Unsafe/ 0 1
674  // Store3 -' -1 1->0
675  // Load4 0 0
676  //
677  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
678  // we just keep assigning to the same cyclic partition until
679  // NumUnsafeDependencesActive reaches 0.
680  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
681  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
682  *Dependences);
683 
684  int NumUnsafeDependencesActive = 0;
685  for (auto &InstDep : MID) {
686  Instruction *I = InstDep.Inst;
687  // We update NumUnsafeDependencesActive post-instruction, catch the
688  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
689  if (NumUnsafeDependencesActive ||
690  InstDep.NumUnsafeDependencesStartOrEnd > 0)
691  Partitions.addToCyclicPartition(I);
692  else
693  Partitions.addToNewNonCyclicPartition(I);
694  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
695  assert(NumUnsafeDependencesActive >= 0 &&
696  "Negative number of dependences active");
697  }
698 
699  // Add partitions for values used outside. These partitions can be out of
700  // order from the original program order. This is OK because if the
701  // partition uses a load we will merge this partition with the original
702  // partition of the load that we set up in the previous loop (see
703  // mergeToAvoidDuplicatedLoads).
704  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
705  for (auto *Inst : DefsUsedOutside)
706  Partitions.addToNewNonCyclicPartition(Inst);
707 
708  DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
709  if (Partitions.getSize() < 2)
710  return fail("CantIsolateUnsafeDeps",
711  "cannot isolate unsafe dependencies");
712 
713  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
714  // should be able to vectorize these together.
715  Partitions.mergeBeforePopulating();
716  DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
717  if (Partitions.getSize() < 2)
718  return fail("CantIsolateUnsafeDeps",
719  "cannot isolate unsafe dependencies");
720 
721  // Now, populate the partitions with non-memory operations.
722  Partitions.populateUsedSet();
723  DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
724 
725  // In order to preserve original lexical order for loads, keep them in the
726  // partition that we set up in the MemoryInstructionDependences loop.
727  if (Partitions.mergeToAvoidDuplicatedLoads()) {
728  DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
729  << Partitions);
730  if (Partitions.getSize() < 2)
731  return fail("CantIsolateUnsafeDeps",
732  "cannot isolate unsafe dependencies");
733  }
734 
735  // Don't distribute the loop if we need too many SCEV run-time checks.
736  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
737  if (Pred.getComplexity() > (IsForced.getValueOr(false)
740  return fail("TooManySCEVRuntimeChecks",
741  "too many SCEV run-time checks needed.\n");
742 
743  DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
744  // We're done forming the partitions set up the reverse mapping from
745  // instructions to partitions.
746  Partitions.setupPartitionIdOnInstructions();
747 
748  // To keep things simple have an empty preheader before we version or clone
749  // the loop. (Also split if this has no predecessor, i.e. entry, because we
750  // rely on PH having a predecessor.)
751  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
752  SplitBlock(PH, PH->getTerminator(), DT, LI);
753 
754  // If we need run-time checks, version the loop now.
755  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
756  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
757  const auto &AllChecks = RtPtrChecking->getChecks();
758  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
759  RtPtrChecking);
760 
761  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
762  DEBUG(dbgs() << "\nPointers:\n");
763  DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
764  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
765  LVer.setAliasChecks(std::move(Checks));
766  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
767  LVer.versionLoop(DefsUsedOutside);
768  LVer.annotateLoopWithNoAlias();
769  }
770 
771  // Create identical copies of the original loop for each partition and hook
772  // them up sequentially.
773  Partitions.cloneLoops();
774 
775  // Now, we remove the instruction from each loop that don't belong to that
776  // partition.
777  Partitions.removeUnusedInsts();
778  DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
779  DEBUG(Partitions.printBlocks());
780 
781  if (LDistVerify) {
782  LI->verify(*DT);
783  DT->verifyDomTree();
784  }
785 
786  ++NumLoopsDistributed;
787  // Report the success.
788  ORE->emit([&]() {
789  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
790  L->getHeader())
791  << "distributed loop";
792  });
793  return true;
794  }
795 
796  /// \brief Provide diagnostics then \return with false.
797  bool fail(StringRef RemarkName, StringRef Message) {
798  LLVMContext &Ctx = F->getContext();
799  bool Forced = isForced().getValueOr(false);
800 
801  DEBUG(dbgs() << "Skipping; " << Message << "\n");
802 
803  // With Rpass-missed report that distribution failed.
804  ORE->emit([&]() {
805  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
806  L->getStartLoc(), L->getHeader())
807  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
808  "more "
809  "info";
810  });
811 
812  // With Rpass-analysis report why. This is on by default if distribution
813  // was requested explicitly.
814  ORE->emit(OptimizationRemarkAnalysis(
816  RemarkName, L->getStartLoc(), L->getHeader())
817  << "loop not distributed: " << Message);
818 
819  // Also issue a warning if distribution was requested explicitly but it
820  // failed.
821  if (Forced)
823  *F, L->getStartLoc(), "loop not distributed: failed "
824  "explicitly specified loop distribution"));
825 
826  return false;
827  }
828 
829  /// \brief Return if distribution forced to be enabled/disabled for the loop.
830  ///
831  /// If the optional has a value, it indicates whether distribution was forced
832  /// to be enabled (true) or disabled (false). If the optional has no value
833  /// distribution was not forced either way.
834  const Optional<bool> &isForced() const { return IsForced; }
835 
836 private:
837  /// \brief Filter out checks between pointers from the same partition.
838  ///
839  /// \p PtrToPartition contains the partition number for pointers. Partition
840  /// number -1 means that the pointer is used in multiple partitions. In this
841  /// case we can't safely omit the check.
843  includeOnlyCrossPartitionChecks(
845  const SmallVectorImpl<int> &PtrToPartition,
846  const RuntimePointerChecking *RtPtrChecking) {
848 
849  copy_if(AllChecks, std::back_inserter(Checks),
851  for (unsigned PtrIdx1 : Check.first->Members)
852  for (unsigned PtrIdx2 : Check.second->Members)
853  // Only include this check if there is a pair of pointers
854  // that require checking and the pointers fall into
855  // separate partitions.
856  //
857  // (Note that we already know at this point that the two
858  // pointer groups need checking but it doesn't follow
859  // that each pair of pointers within the two groups need
860  // checking as well.
861  //
862  // In other words we don't want to include a check just
863  // because there is a pair of pointers between the two
864  // pointer groups that require checks and a different
865  // pair whose pointers fall into different partitions.)
866  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
868  PtrToPartition, PtrIdx1, PtrIdx2))
869  return true;
870  return false;
871  });
872 
873  return Checks;
874  }
875 
876  /// \brief Check whether the loop metadata is forcing distribution to be
877  /// enabled/disabled.
878  void setForced() {
880  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
881  if (!Value)
882  return;
883 
884  const MDOperand *Op = *Value;
885  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
886  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
887  }
888 
889  Loop *L;
890  Function *F;
891 
892  // Analyses used.
893  LoopInfo *LI;
894  const LoopAccessInfo *LAI = nullptr;
895  DominatorTree *DT;
896  ScalarEvolution *SE;
898 
899  /// \brief Indicates whether distribution is forced to be enabled/disabled for
900  /// the loop.
901  ///
902  /// If the optional has a value, it indicates whether distribution was forced
903  /// to be enabled (true) or disabled (false). If the optional has no value
904  /// distribution was not forced either way.
905  Optional<bool> IsForced;
906 };
907 
908 } // end anonymous namespace
909 
910 /// Shared implementation between new and old PMs.
911 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
913  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
914  // Build up a worklist of inner-loops to vectorize. This is necessary as the
915  // act of distributing a loop creates new loops and can invalidate iterators
916  // across the loops.
917  SmallVector<Loop *, 8> Worklist;
918 
919  for (Loop *TopLevelLoop : *LI)
920  for (Loop *L : depth_first(TopLevelLoop))
921  // We only handle inner-most loops.
922  if (L->empty())
923  Worklist.push_back(L);
924 
925  // Now walk the identified inner loops.
926  bool Changed = false;
927  for (Loop *L : Worklist) {
928  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
929 
930  // If distribution was forced for the specific loop to be
931  // enabled/disabled, follow that. Otherwise use the global flag.
932  if (LDL.isForced().getValueOr(EnableLoopDistribute))
933  Changed |= LDL.processLoop(GetLAA);
934  }
935 
936  // Process each loop nest in the function.
937  return Changed;
938 }
939 
940 namespace {
941 
942 /// \brief The pass class.
943 class LoopDistributeLegacy : public FunctionPass {
944 public:
945  static char ID;
946 
947  LoopDistributeLegacy() : FunctionPass(ID) {
948  // The default is set by the caller.
950  }
951 
952  bool runOnFunction(Function &F) override {
953  if (skipFunction(F))
954  return false;
955 
956  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
957  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
958  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
959  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
960  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
961  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
962  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
963 
964  return runImpl(F, LI, DT, SE, ORE, GetLAA);
965  }
966 
967  void getAnalysisUsage(AnalysisUsage &AU) const override {
976  }
977 };
978 
979 } // end anonymous namespace
980 
983  auto &LI = AM.getResult<LoopAnalysis>(F);
984  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
985  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
987 
988  // We don't directly need these analyses but they're required for loop
989  // analyses so provide them below.
990  auto &AA = AM.getResult<AAManager>(F);
991  auto &AC = AM.getResult<AssumptionAnalysis>(F);
992  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
993  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
994 
995  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
996  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
997  [&](Loop &L) -> const LoopAccessInfo & {
998  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
999  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1000  };
1001 
1002  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1003  if (!Changed)
1004  return PreservedAnalyses::all();
1005  PreservedAnalyses PA;
1006  PA.preserve<LoopAnalysis>();
1008  PA.preserve<GlobalsAA>();
1009  return PA;
1010 }
1011 
1013 
1014 static const char ldist_name[] = "Loop Distribution";
1015 
1016 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1017  false)
1023 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1024 
1025 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:709
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:235
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:860
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:238
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:624
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:235
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:933
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:1005
BlockT * getHeader() const
Definition: LoopInfo.h:100
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:22
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void verifyDomTree() const
Verify the correctness of the domtree by re-computing it.
Definition: Dominators.cpp:305
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:345
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
Optional< const MDOperand * > findStringMetadataForLoop(Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:1086
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:246
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
bool empty() const
Definition: LoopInfo.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:73
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Dependence - This class represents a dependence between two memory memory references in a function...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:66
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946