LLVM  10.0.0svn
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
41 #include "llvm/Analysis/LoopInfo.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Dominators.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/IR/PassManager.h"
57 #include "llvm/IR/Value.h"
58 #include "llvm/Pass.h"
59 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Transforms/Scalar.h"
69 #include <cassert>
70 #include <functional>
71 #include <list>
72 #include <tuple>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define LDIST_NAME "loop-distribute"
78 #define DEBUG_TYPE LDIST_NAME
79 
80 /// @{
81 /// Metadata attribute names
82 static const char *const LLVMLoopDistributeFollowupAll =
83  "llvm.loop.distribute.followup_all";
84 static const char *const LLVMLoopDistributeFollowupCoincident =
85  "llvm.loop.distribute.followup_coincident";
86 static const char *const LLVMLoopDistributeFollowupSequential =
87  "llvm.loop.distribute.followup_sequential";
88 static const char *const LLVMLoopDistributeFollowupFallback =
89  "llvm.loop.distribute.followup_fallback";
90 /// @}
91 
92 static cl::opt<bool>
93  LDistVerify("loop-distribute-verify", cl::Hidden,
94  cl::desc("Turn on DominatorTree and LoopInfo verification "
95  "after Loop Distribution"),
96  cl::init(false));
97 
99  "loop-distribute-non-if-convertible", cl::Hidden,
100  cl::desc("Whether to distribute into a loop that may not be "
101  "if-convertible by the loop vectorizer"),
102  cl::init(false));
103 
105  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
106  cl::desc("The maximum number of SCEV checks allowed for Loop "
107  "Distribution"));
108 
110  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
111  cl::Hidden,
112  cl::desc(
113  "The maximum number of SCEV checks allowed for Loop "
114  "Distribution for loop marked with #pragma loop distribute(enable)"));
115 
117  "enable-loop-distribute", cl::Hidden,
118  cl::desc("Enable the new, experimental LoopDistribution Pass"),
119  cl::init(false));
120 
121 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
122 
123 namespace {
124 
125 /// Maintains the set of instructions of the loop for a partition before
126 /// cloning. After cloning, it hosts the new loop.
127 class InstPartition {
128  using InstructionSet = SmallPtrSet<Instruction *, 8>;
129 
130 public:
131  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
132  : DepCycle(DepCycle), OrigLoop(L) {
133  Set.insert(I);
134  }
135 
136  /// Returns whether this partition contains a dependence cycle.
137  bool hasDepCycle() const { return DepCycle; }
138 
139  /// Adds an instruction to this partition.
140  void add(Instruction *I) { Set.insert(I); }
141 
142  /// Collection accessors.
143  InstructionSet::iterator begin() { return Set.begin(); }
144  InstructionSet::iterator end() { return Set.end(); }
145  InstructionSet::const_iterator begin() const { return Set.begin(); }
146  InstructionSet::const_iterator end() const { return Set.end(); }
147  bool empty() const { return Set.empty(); }
148 
149  /// Moves this partition into \p Other. This partition becomes empty
150  /// after this.
151  void moveTo(InstPartition &Other) {
152  Other.Set.insert(Set.begin(), Set.end());
153  Set.clear();
154  Other.DepCycle |= DepCycle;
155  }
156 
157  /// Populates the partition with a transitive closure of all the
158  /// instructions that the seeded instructions dependent on.
159  void populateUsedSet() {
160  // FIXME: We currently don't use control-dependence but simply include all
161  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
162  // up.
163  for (auto *B : OrigLoop->getBlocks())
164  Set.insert(B->getTerminator());
165 
166  // Follow the use-def chains to form a transitive closure of all the
167  // instructions that the originally seeded instructions depend on.
168  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
169  while (!Worklist.empty()) {
170  Instruction *I = Worklist.pop_back_val();
171  // Insert instructions from the loop that we depend on.
172  for (Value *V : I->operand_values()) {
173  auto *I = dyn_cast<Instruction>(V);
174  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
175  Worklist.push_back(I);
176  }
177  }
178  }
179 
180  /// Clones the original loop.
181  ///
182  /// Updates LoopInfo and DominatorTree using the information that block \p
183  /// LoopDomBB dominates the loop.
184  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
185  unsigned Index, LoopInfo *LI,
186  DominatorTree *DT) {
187  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
188  VMap, Twine(".ldist") + Twine(Index),
189  LI, DT, ClonedLoopBlocks);
190  return ClonedLoop;
191  }
192 
193  /// The cloned loop. If this partition is mapped to the original loop,
194  /// this is null.
195  const Loop *getClonedLoop() const { return ClonedLoop; }
196 
197  /// Returns the loop where this partition ends up after distribution.
198  /// If this partition is mapped to the original loop then use the block from
199  /// the loop.
200  Loop *getDistributedLoop() const {
201  return ClonedLoop ? ClonedLoop : OrigLoop;
202  }
203 
204  /// The VMap that is populated by cloning and then used in
205  /// remapinstruction to remap the cloned instructions.
206  ValueToValueMapTy &getVMap() { return VMap; }
207 
208  /// Remaps the cloned instructions using VMap.
209  void remapInstructions() {
210  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
211  }
212 
213  /// Based on the set of instructions selected for this partition,
214  /// removes the unnecessary ones.
215  void removeUnusedInsts() {
217 
218  for (auto *Block : OrigLoop->getBlocks())
219  for (auto &Inst : *Block)
220  if (!Set.count(&Inst)) {
221  Instruction *NewInst = &Inst;
222  if (!VMap.empty())
223  NewInst = cast<Instruction>(VMap[NewInst]);
224 
225  assert(!isa<BranchInst>(NewInst) &&
226  "Branches are marked used early on");
227  Unused.push_back(NewInst);
228  }
229 
230  // Delete the instructions backwards, as it has a reduced likelihood of
231  // having to update as many def-use and use-def chains.
232  for (auto *Inst : reverse(Unused)) {
233  if (!Inst->use_empty())
234  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
235  Inst->eraseFromParent();
236  }
237  }
238 
239  void print() const {
240  if (DepCycle)
241  dbgs() << " (cycle)\n";
242  for (auto *I : Set)
243  // Prefix with the block name.
244  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
245  }
246 
247  void printBlocks() const {
248  for (auto *BB : getDistributedLoop()->getBlocks())
249  dbgs() << *BB;
250  }
251 
252 private:
253  /// Instructions from OrigLoop selected for this partition.
254  InstructionSet Set;
255 
256  /// Whether this partition contains a dependence cycle.
257  bool DepCycle;
258 
259  /// The original loop.
260  Loop *OrigLoop;
261 
262  /// The cloned loop. If this partition is mapped to the original loop,
263  /// this is null.
264  Loop *ClonedLoop = nullptr;
265 
266  /// The blocks of ClonedLoop including the preheader. If this
267  /// partition is mapped to the original loop, this is empty.
268  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
269 
270  /// These gets populated once the set of instructions have been
271  /// finalized. If this partition is mapped to the original loop, these are not
272  /// set.
273  ValueToValueMapTy VMap;
274 };
275 
276 /// Holds the set of Partitions. It populates them, merges them and then
277 /// clones the loops.
278 class InstPartitionContainer {
279  using InstToPartitionIdT = DenseMap<Instruction *, int>;
280 
281 public:
282  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
283  : L(L), LI(LI), DT(DT) {}
284 
285  /// Returns the number of partitions.
286  unsigned getSize() const { return PartitionContainer.size(); }
287 
288  /// Adds \p Inst into the current partition if that is marked to
289  /// contain cycles. Otherwise start a new partition for it.
290  void addToCyclicPartition(Instruction *Inst) {
291  // If the current partition is non-cyclic. Start a new one.
292  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
293  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
294  else
295  PartitionContainer.back().add(Inst);
296  }
297 
298  /// Adds \p Inst into a partition that is not marked to contain
299  /// dependence cycles.
300  ///
301  // Initially we isolate memory instructions into as many partitions as
302  // possible, then later we may merge them back together.
303  void addToNewNonCyclicPartition(Instruction *Inst) {
304  PartitionContainer.emplace_back(Inst, L);
305  }
306 
307  /// Merges adjacent non-cyclic partitions.
308  ///
309  /// The idea is that we currently only want to isolate the non-vectorizable
310  /// partition. We could later allow more distribution among these partition
311  /// too.
312  void mergeAdjacentNonCyclic() {
313  mergeAdjacentPartitionsIf(
314  [](const InstPartition *P) { return !P->hasDepCycle(); });
315  }
316 
317  /// If a partition contains only conditional stores, we won't vectorize
318  /// it. Try to merge it with a previous cyclic partition.
319  void mergeNonIfConvertible() {
320  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
321  if (Partition->hasDepCycle())
322  return true;
323 
324  // Now, check if all stores are conditional in this partition.
325  bool seenStore = false;
326 
327  for (auto *Inst : *Partition)
328  if (isa<StoreInst>(Inst)) {
329  seenStore = true;
331  return false;
332  }
333  return seenStore;
334  });
335  }
336 
337  /// Merges the partitions according to various heuristics.
338  void mergeBeforePopulating() {
339  mergeAdjacentNonCyclic();
341  mergeNonIfConvertible();
342  }
343 
344  /// Merges partitions in order to ensure that no loads are duplicated.
345  ///
346  /// We can't duplicate loads because that could potentially reorder them.
347  /// LoopAccessAnalysis provides dependency information with the context that
348  /// the order of memory operation is preserved.
349  ///
350  /// Return if any partitions were merged.
351  bool mergeToAvoidDuplicatedLoads() {
352  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
353  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
354 
355  LoadToPartitionT LoadToPartition;
356  ToBeMergedT ToBeMerged;
357 
358  // Step through the partitions and create equivalence between partitions
359  // that contain the same load. Also put partitions in between them in the
360  // same equivalence class to avoid reordering of memory operations.
361  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
362  E = PartitionContainer.end();
363  I != E; ++I) {
364  auto *PartI = &*I;
365 
366  // If a load occurs in two partitions PartI and PartJ, merge all
367  // partitions (PartI, PartJ] into PartI.
368  for (Instruction *Inst : *PartI)
369  if (isa<LoadInst>(Inst)) {
370  bool NewElt;
371  LoadToPartitionT::iterator LoadToPart;
372 
373  std::tie(LoadToPart, NewElt) =
374  LoadToPartition.insert(std::make_pair(Inst, PartI));
375  if (!NewElt) {
376  LLVM_DEBUG(dbgs()
377  << "Merging partitions due to this load in multiple "
378  << "partitions: " << PartI << ", " << LoadToPart->second
379  << "\n"
380  << *Inst << "\n");
381 
382  auto PartJ = I;
383  do {
384  --PartJ;
385  ToBeMerged.unionSets(PartI, &*PartJ);
386  } while (&*PartJ != LoadToPart->second);
387  }
388  }
389  }
390  if (ToBeMerged.empty())
391  return false;
392 
393  // Merge the member of an equivalence class into its class leader. This
394  // makes the members empty.
395  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
396  I != E; ++I) {
397  if (!I->isLeader())
398  continue;
399 
400  auto PartI = I->getData();
401  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
402  ToBeMerged.member_end())) {
403  PartJ->moveTo(*PartI);
404  }
405  }
406 
407  // Remove the empty partitions.
408  PartitionContainer.remove_if(
409  [](const InstPartition &P) { return P.empty(); });
410 
411  return true;
412  }
413 
414  /// Sets up the mapping between instructions to partitions. If the
415  /// instruction is duplicated across multiple partitions, set the entry to -1.
416  void setupPartitionIdOnInstructions() {
417  int PartitionID = 0;
418  for (const auto &Partition : PartitionContainer) {
419  for (Instruction *Inst : Partition) {
420  bool NewElt;
421  InstToPartitionIdT::iterator Iter;
422 
423  std::tie(Iter, NewElt) =
424  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
425  if (!NewElt)
426  Iter->second = -1;
427  }
428  ++PartitionID;
429  }
430  }
431 
432  /// Populates the partition with everything that the seeding
433  /// instructions require.
434  void populateUsedSet() {
435  for (auto &P : PartitionContainer)
436  P.populateUsedSet();
437  }
438 
439  /// This performs the main chunk of the work of cloning the loops for
440  /// the partitions.
441  void cloneLoops() {
442  BasicBlock *OrigPH = L->getLoopPreheader();
443  // At this point the predecessor of the preheader is either the memcheck
444  // block or the top part of the original preheader.
445  BasicBlock *Pred = OrigPH->getSinglePredecessor();
446  assert(Pred && "Preheader does not have a single predecessor");
447  BasicBlock *ExitBlock = L->getExitBlock();
448  assert(ExitBlock && "No single exit block");
449  Loop *NewLoop;
450 
451  assert(!PartitionContainer.empty() && "at least two partitions expected");
452  // We're cloning the preheader along with the loop so we already made sure
453  // it was empty.
454  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
455  "preheader not empty");
456 
457  // Preserve the original loop ID for use after the transformation.
458  MDNode *OrigLoopID = L->getLoopID();
459 
460  // Create a loop for each partition except the last. Clone the original
461  // loop before PH along with adding a preheader for the cloned loop. Then
462  // update PH to point to the newly added preheader.
463  BasicBlock *TopPH = OrigPH;
464  unsigned Index = getSize() - 1;
465  for (auto I = std::next(PartitionContainer.rbegin()),
466  E = PartitionContainer.rend();
467  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
468  auto *Part = &*I;
469 
470  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
471 
472  Part->getVMap()[ExitBlock] = TopPH;
473  Part->remapInstructions();
474  setNewLoopID(OrigLoopID, Part);
475  }
476  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
477 
478  // Also set a new loop ID for the last loop.
479  setNewLoopID(OrigLoopID, &PartitionContainer.back());
480 
481  // Now go in forward order and update the immediate dominator for the
482  // preheaders with the exiting block of the previous loop. Dominance
483  // within the loop is updated in cloneLoopWithPreheader.
484  for (auto Curr = PartitionContainer.cbegin(),
485  Next = std::next(PartitionContainer.cbegin()),
486  E = PartitionContainer.cend();
487  Next != E; ++Curr, ++Next)
489  Next->getDistributedLoop()->getLoopPreheader(),
490  Curr->getDistributedLoop()->getExitingBlock());
491  }
492 
493  /// Removes the dead instructions from the cloned loops.
494  void removeUnusedInsts() {
495  for (auto &Partition : PartitionContainer)
496  Partition.removeUnusedInsts();
497  }
498 
499  /// For each memory pointer, it computes the partitionId the pointer is
500  /// used in.
501  ///
502  /// This returns an array of int where the I-th entry corresponds to I-th
503  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
504  /// partitions its entry is set to -1.
506  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
507  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
508 
509  unsigned N = RtPtrCheck->Pointers.size();
510  SmallVector<int, 8> PtrToPartitions(N);
511  for (unsigned I = 0; I < N; ++I) {
512  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
513  auto Instructions =
514  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
515 
516  int &Partition = PtrToPartitions[I];
517  // First set it to uninitialized.
518  Partition = -2;
519  for (Instruction *Inst : Instructions) {
520  // Note that this could be -1 if Inst is duplicated across multiple
521  // partitions.
522  int ThisPartition = this->InstToPartitionId[Inst];
523  if (Partition == -2)
524  Partition = ThisPartition;
525  // -1 means belonging to multiple partitions.
526  else if (Partition == -1)
527  break;
528  else if (Partition != (int)ThisPartition)
529  Partition = -1;
530  }
531  assert(Partition != -2 && "Pointer not belonging to any partition");
532  }
533 
534  return PtrToPartitions;
535  }
536 
537  void print(raw_ostream &OS) const {
538  unsigned Index = 0;
539  for (const auto &P : PartitionContainer) {
540  OS << "Partition " << Index++ << " (" << &P << "):\n";
541  P.print();
542  }
543  }
544 
545  void dump() const { print(dbgs()); }
546 
547 #ifndef NDEBUG
548  friend raw_ostream &operator<<(raw_ostream &OS,
549  const InstPartitionContainer &Partitions) {
550  Partitions.print(OS);
551  return OS;
552  }
553 #endif
554 
555  void printBlocks() const {
556  unsigned Index = 0;
557  for (const auto &P : PartitionContainer) {
558  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
559  P.printBlocks();
560  }
561  }
562 
563 private:
564  using PartitionContainerT = std::list<InstPartition>;
565 
566  /// List of partitions.
567  PartitionContainerT PartitionContainer;
568 
569  /// Mapping from Instruction to partition Id. If the instruction
570  /// belongs to multiple partitions the entry contains -1.
571  InstToPartitionIdT InstToPartitionId;
572 
573  Loop *L;
574  LoopInfo *LI;
575  DominatorTree *DT;
576 
577  /// The control structure to merge adjacent partitions if both satisfy
578  /// the \p Predicate.
579  template <class UnaryPredicate>
580  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581  InstPartition *PrevMatch = nullptr;
582  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583  auto DoesMatch = Predicate(&*I);
584  if (PrevMatch == nullptr && DoesMatch) {
585  PrevMatch = &*I;
586  ++I;
587  } else if (PrevMatch != nullptr && DoesMatch) {
588  I->moveTo(*PrevMatch);
589  I = PartitionContainer.erase(I);
590  } else {
591  PrevMatch = nullptr;
592  ++I;
593  }
594  }
595  }
596 
597  /// Assign new LoopIDs for the partition's cloned loop.
598  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
600  OrigLoopID,
602  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
603  : LLVMLoopDistributeFollowupCoincident});
604  if (PartitionID.hasValue()) {
605  Loop *NewLoop = Part->getDistributedLoop();
606  NewLoop->setLoopID(PartitionID.getValue());
607  }
608  }
609 };
610 
611 /// For each memory instruction, this class maintains difference of the
612 /// number of unsafe dependences that start out from this instruction minus
613 /// those that end here.
614 ///
615 /// By traversing the memory instructions in program order and accumulating this
616 /// number, we know whether any unsafe dependence crosses over a program point.
617 class MemoryInstructionDependences {
619 
620 public:
621  struct Entry {
622  Instruction *Inst;
623  unsigned NumUnsafeDependencesStartOrEnd = 0;
624 
625  Entry(Instruction *Inst) : Inst(Inst) {}
626  };
627 
628  using AccessesType = SmallVector<Entry, 8>;
629 
630  AccessesType::const_iterator begin() const { return Accesses.begin(); }
631  AccessesType::const_iterator end() const { return Accesses.end(); }
632 
633  MemoryInstructionDependences(
634  const SmallVectorImpl<Instruction *> &Instructions,
635  const SmallVectorImpl<Dependence> &Dependences) {
636  Accesses.append(Instructions.begin(), Instructions.end());
637 
638  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
639  for (auto &Dep : Dependences)
640  if (Dep.isPossiblyBackward()) {
641  // Note that the designations source and destination follow the program
642  // order, i.e. source is always first. (The direction is given by the
643  // DepType.)
644  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
645  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
646 
647  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
648  }
649  }
650 
651 private:
652  AccessesType Accesses;
653 };
654 
655 /// The actual class performing the per-loop work.
656 class LoopDistributeForLoop {
657 public:
658  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
660  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
661  setForced();
662  }
663 
664  /// Try to distribute an inner-most loop.
665  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
666  assert(L->empty() && "Only process inner loops.");
667 
668  LLVM_DEBUG(dbgs() << "\nLDist: In \""
669  << L->getHeader()->getParent()->getName()
670  << "\" checking " << *L << "\n");
671 
672  if (!L->getExitBlock())
673  return fail("MultipleExitBlocks", "multiple exit blocks");
674  if (!L->isLoopSimplifyForm())
675  return fail("NotLoopSimplifyForm",
676  "loop is not in loop-simplify form");
677 
678  BasicBlock *PH = L->getLoopPreheader();
679 
680  // LAA will check that we only have a single exiting block.
681  LAI = &GetLAA(*L);
682 
683  // Currently, we only distribute to isolate the part of the loop with
684  // dependence cycles to enable partial vectorization.
685  if (LAI->canVectorizeMemory())
686  return fail("MemOpsCanBeVectorized",
687  "memory operations are safe for vectorization");
688 
689  auto *Dependences = LAI->getDepChecker().getDependences();
690  if (!Dependences || Dependences->empty())
691  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
692 
693  InstPartitionContainer Partitions(L, LI, DT);
694 
695  // First, go through each memory operation and assign them to consecutive
696  // partitions (the order of partitions follows program order). Put those
697  // with unsafe dependences into "cyclic" partition otherwise put each store
698  // in its own "non-cyclic" partition (we'll merge these later).
699  //
700  // Note that a memory operation (e.g. Load2 below) at a program point that
701  // has an unsafe dependence (Store3->Load1) spanning over it must be
702  // included in the same cyclic partition as the dependent operations. This
703  // is to preserve the original program order after distribution. E.g.:
704  //
705  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
706  // Load1 -. 1 0->1
707  // Load2 | /Unsafe/ 0 1
708  // Store3 -' -1 1->0
709  // Load4 0 0
710  //
711  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
712  // we just keep assigning to the same cyclic partition until
713  // NumUnsafeDependencesActive reaches 0.
714  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
715  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
716  *Dependences);
717 
718  int NumUnsafeDependencesActive = 0;
719  for (auto &InstDep : MID) {
720  Instruction *I = InstDep.Inst;
721  // We update NumUnsafeDependencesActive post-instruction, catch the
722  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
723  if (NumUnsafeDependencesActive ||
724  InstDep.NumUnsafeDependencesStartOrEnd > 0)
725  Partitions.addToCyclicPartition(I);
726  else
727  Partitions.addToNewNonCyclicPartition(I);
728  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
729  assert(NumUnsafeDependencesActive >= 0 &&
730  "Negative number of dependences active");
731  }
732 
733  // Add partitions for values used outside. These partitions can be out of
734  // order from the original program order. This is OK because if the
735  // partition uses a load we will merge this partition with the original
736  // partition of the load that we set up in the previous loop (see
737  // mergeToAvoidDuplicatedLoads).
738  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
739  for (auto *Inst : DefsUsedOutside)
740  Partitions.addToNewNonCyclicPartition(Inst);
741 
742  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
743  if (Partitions.getSize() < 2)
744  return fail("CantIsolateUnsafeDeps",
745  "cannot isolate unsafe dependencies");
746 
747  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
748  // should be able to vectorize these together.
749  Partitions.mergeBeforePopulating();
750  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
751  if (Partitions.getSize() < 2)
752  return fail("CantIsolateUnsafeDeps",
753  "cannot isolate unsafe dependencies");
754 
755  // Now, populate the partitions with non-memory operations.
756  Partitions.populateUsedSet();
757  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
758 
759  // In order to preserve original lexical order for loads, keep them in the
760  // partition that we set up in the MemoryInstructionDependences loop.
761  if (Partitions.mergeToAvoidDuplicatedLoads()) {
762  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
763  << Partitions);
764  if (Partitions.getSize() < 2)
765  return fail("CantIsolateUnsafeDeps",
766  "cannot isolate unsafe dependencies");
767  }
768 
769  // Don't distribute the loop if we need too many SCEV run-time checks, or
770  // any if it's illegal.
771  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
772  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
773  return fail("RuntimeCheckWithConvergent",
774  "may not insert runtime check with convergent operation");
775  }
776 
777  if (Pred.getComplexity() > (IsForced.getValueOr(false)
780  return fail("TooManySCEVRuntimeChecks",
781  "too many SCEV run-time checks needed.\n");
782 
783  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
784  return fail("HeuristicDisabled", "distribution heuristic disabled");
785 
786  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
787  // We're done forming the partitions set up the reverse mapping from
788  // instructions to partitions.
789  Partitions.setupPartitionIdOnInstructions();
790 
791  // To keep things simple have an empty preheader before we version or clone
792  // the loop. (Also split if this has no predecessor, i.e. entry, because we
793  // rely on PH having a predecessor.)
794  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
795  SplitBlock(PH, PH->getTerminator(), DT, LI);
796 
797  // If we need run-time checks, version the loop now.
798  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
799  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
800  const auto &AllChecks = RtPtrChecking->getChecks();
801  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
802  RtPtrChecking);
803 
804  if (LAI->hasConvergentOp() && !Checks.empty()) {
805  return fail("RuntimeCheckWithConvergent",
806  "may not insert runtime check with convergent operation");
807  }
808 
809  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
810  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
811 
812  MDNode *OrigLoopID = L->getLoopID();
813 
814  LLVM_DEBUG(dbgs() << "\nPointers:\n");
816  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
817  LVer.setAliasChecks(std::move(Checks));
818  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
819  LVer.versionLoop(DefsUsedOutside);
820  LVer.annotateLoopWithNoAlias();
821 
822  // The unversioned loop will not be changed, so we inherit all attributes
823  // from the original loop, but remove the loop distribution metadata to
824  // avoid to distribute it again.
825  MDNode *UnversionedLoopID =
826  makeFollowupLoopID(OrigLoopID,
828  LLVMLoopDistributeFollowupFallback},
829  "llvm.loop.distribute.", true)
830  .getValue();
831  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
832  }
833 
834  // Create identical copies of the original loop for each partition and hook
835  // them up sequentially.
836  Partitions.cloneLoops();
837 
838  // Now, we remove the instruction from each loop that don't belong to that
839  // partition.
840  Partitions.removeUnusedInsts();
841  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
842  LLVM_DEBUG(Partitions.printBlocks());
843 
844  if (LDistVerify) {
845  LI->verify(*DT);
847  }
848 
849  ++NumLoopsDistributed;
850  // Report the success.
851  ORE->emit([&]() {
852  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
853  L->getHeader())
854  << "distributed loop";
855  });
856  return true;
857  }
858 
859  /// Provide diagnostics then \return with false.
860  bool fail(StringRef RemarkName, StringRef Message) {
861  LLVMContext &Ctx = F->getContext();
862  bool Forced = isForced().getValueOr(false);
863 
864  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
865 
866  // With Rpass-missed report that distribution failed.
867  ORE->emit([&]() {
868  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
869  L->getStartLoc(), L->getHeader())
870  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
871  "more "
872  "info";
873  });
874 
875  // With Rpass-analysis report why. This is on by default if distribution
876  // was requested explicitly.
877  ORE->emit(OptimizationRemarkAnalysis(
879  RemarkName, L->getStartLoc(), L->getHeader())
880  << "loop not distributed: " << Message);
881 
882  // Also issue a warning if distribution was requested explicitly but it
883  // failed.
884  if (Forced)
886  *F, L->getStartLoc(), "loop not distributed: failed "
887  "explicitly specified loop distribution"));
888 
889  return false;
890  }
891 
892  /// Return if distribution forced to be enabled/disabled for the loop.
893  ///
894  /// If the optional has a value, it indicates whether distribution was forced
895  /// to be enabled (true) or disabled (false). If the optional has no value
896  /// distribution was not forced either way.
897  const Optional<bool> &isForced() const { return IsForced; }
898 
899 private:
900  /// Filter out checks between pointers from the same partition.
901  ///
902  /// \p PtrToPartition contains the partition number for pointers. Partition
903  /// number -1 means that the pointer is used in multiple partitions. In this
904  /// case we can't safely omit the check.
906  includeOnlyCrossPartitionChecks(
908  const SmallVectorImpl<int> &PtrToPartition,
909  const RuntimePointerChecking *RtPtrChecking) {
911 
912  copy_if(AllChecks, std::back_inserter(Checks),
914  for (unsigned PtrIdx1 : Check.first->Members)
915  for (unsigned PtrIdx2 : Check.second->Members)
916  // Only include this check if there is a pair of pointers
917  // that require checking and the pointers fall into
918  // separate partitions.
919  //
920  // (Note that we already know at this point that the two
921  // pointer groups need checking but it doesn't follow
922  // that each pair of pointers within the two groups need
923  // checking as well.
924  //
925  // In other words we don't want to include a check just
926  // because there is a pair of pointers between the two
927  // pointer groups that require checks and a different
928  // pair whose pointers fall into different partitions.)
929  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
931  PtrToPartition, PtrIdx1, PtrIdx2))
932  return true;
933  return false;
934  });
935 
936  return Checks;
937  }
938 
939  /// Check whether the loop metadata is forcing distribution to be
940  /// enabled/disabled.
941  void setForced() {
943  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
944  if (!Value)
945  return;
946 
947  const MDOperand *Op = *Value;
948  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
949  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
950  }
951 
952  Loop *L;
953  Function *F;
954 
955  // Analyses used.
956  LoopInfo *LI;
957  const LoopAccessInfo *LAI = nullptr;
958  DominatorTree *DT;
959  ScalarEvolution *SE;
961 
962  /// Indicates whether distribution is forced to be enabled/disabled for
963  /// the loop.
964  ///
965  /// If the optional has a value, it indicates whether distribution was forced
966  /// to be enabled (true) or disabled (false). If the optional has no value
967  /// distribution was not forced either way.
968  Optional<bool> IsForced;
969 };
970 
971 } // end anonymous namespace
972 
973 /// Shared implementation between new and old PMs.
974 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
976  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
977  // Build up a worklist of inner-loops to vectorize. This is necessary as the
978  // act of distributing a loop creates new loops and can invalidate iterators
979  // across the loops.
980  SmallVector<Loop *, 8> Worklist;
981 
982  for (Loop *TopLevelLoop : *LI)
983  for (Loop *L : depth_first(TopLevelLoop))
984  // We only handle inner-most loops.
985  if (L->empty())
986  Worklist.push_back(L);
987 
988  // Now walk the identified inner loops.
989  bool Changed = false;
990  for (Loop *L : Worklist) {
991  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
992 
993  // If distribution was forced for the specific loop to be
994  // enabled/disabled, follow that. Otherwise use the global flag.
995  if (LDL.isForced().getValueOr(EnableLoopDistribute))
996  Changed |= LDL.processLoop(GetLAA);
997  }
998 
999  // Process each loop nest in the function.
1000  return Changed;
1001 }
1002 
1003 namespace {
1004 
1005 /// The pass class.
1006 class LoopDistributeLegacy : public FunctionPass {
1007 public:
1008  static char ID;
1009 
1010  LoopDistributeLegacy() : FunctionPass(ID) {
1011  // The default is set by the caller.
1013  }
1014 
1015  bool runOnFunction(Function &F) override {
1016  if (skipFunction(F))
1017  return false;
1018 
1019  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1020  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1021  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1022  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1023  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1024  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1025  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1026 
1027  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1028  }
1029 
1030  void getAnalysisUsage(AnalysisUsage &AU) const override {
1039  }
1040 };
1041 
1042 } // end anonymous namespace
1043 
1046  auto &LI = AM.getResult<LoopAnalysis>(F);
1047  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1048  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1050 
1051  // We don't directly need these analyses but they're required for loop
1052  // analyses so provide them below.
1053  auto &AA = AM.getResult<AAManager>(F);
1054  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1055  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1056  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1057 
1058  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1059  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1060  [&](Loop &L) -> const LoopAccessInfo & {
1061  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1062  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1063  };
1064 
1065  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1066  if (!Changed)
1067  return PreservedAnalyses::all();
1068  PreservedAnalyses PA;
1069  PA.preserve<LoopAnalysis>();
1071  PA.preserve<GlobalsAA>();
1072  return PA;
1073 }
1074 
1076 
1077 static const char ldist_name[] = "Loop Distribution";
1078 
1079 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1080  false)
1086 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1087 
1088 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1212
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:681
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static const char *const LLVMLoopDistributeFollowupSequential
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:224
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:120
BlockT * getHeader() const
Definition: LoopInfo.h:105
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static const char *const LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupFallback
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:513
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:20
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:240
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:611
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:302
static const char *const LLVMLoopDistributeFollowupAll
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:464
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:489
This analysis provides dependence information for the memory accesses of a loop.
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:251
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2047
bool empty() const
Definition: LoopInfo.h:151
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:74
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:383
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:66
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1045