LLVM  12.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
41 #include "llvm/Analysis/LoopInfo.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Dominators.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/IR/PassManager.h"
57 #include "llvm/IR/Value.h"
58 #include "llvm/InitializePasses.h"
59 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <cassert>
71 #include <functional>
72 #include <list>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define LDIST_NAME "loop-distribute"
79 #define DEBUG_TYPE LDIST_NAME
80 
81 /// @{
82 /// Metadata attribute names
83 static const char *const LLVMLoopDistributeFollowupAll =
84  "llvm.loop.distribute.followup_all";
85 static const char *const LLVMLoopDistributeFollowupCoincident =
86  "llvm.loop.distribute.followup_coincident";
87 static const char *const LLVMLoopDistributeFollowupSequential =
88  "llvm.loop.distribute.followup_sequential";
89 static const char *const LLVMLoopDistributeFollowupFallback =
90  "llvm.loop.distribute.followup_fallback";
91 /// @}
92 
93 static cl::opt<bool>
94  LDistVerify("loop-distribute-verify", cl::Hidden,
95  cl::desc("Turn on DominatorTree and LoopInfo verification "
96  "after Loop Distribution"),
97  cl::init(false));
98 
100  "loop-distribute-non-if-convertible", cl::Hidden,
101  cl::desc("Whether to distribute into a loop that may not be "
102  "if-convertible by the loop vectorizer"),
103  cl::init(false));
104 
106  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
107  cl::desc("The maximum number of SCEV checks allowed for Loop "
108  "Distribution"));
109 
111  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
112  cl::Hidden,
113  cl::desc(
114  "The maximum number of SCEV checks allowed for Loop "
115  "Distribution for loop marked with #pragma loop distribute(enable)"));
116 
118  "enable-loop-distribute", cl::Hidden,
119  cl::desc("Enable the new, experimental LoopDistribution Pass"),
120  cl::init(false));
121 
122 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
123 
124 namespace {
125 
126 /// Maintains the set of instructions of the loop for a partition before
127 /// cloning. After cloning, it hosts the new loop.
128 class InstPartition {
129  using InstructionSet = SmallPtrSet<Instruction *, 8>;
130 
131 public:
132  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
133  : DepCycle(DepCycle), OrigLoop(L) {
134  Set.insert(I);
135  }
136 
137  /// Returns whether this partition contains a dependence cycle.
138  bool hasDepCycle() const { return DepCycle; }
139 
140  /// Adds an instruction to this partition.
141  void add(Instruction *I) { Set.insert(I); }
142 
143  /// Collection accessors.
144  InstructionSet::iterator begin() { return Set.begin(); }
145  InstructionSet::iterator end() { return Set.end(); }
146  InstructionSet::const_iterator begin() const { return Set.begin(); }
147  InstructionSet::const_iterator end() const { return Set.end(); }
148  bool empty() const { return Set.empty(); }
149 
150  /// Moves this partition into \p Other. This partition becomes empty
151  /// after this.
152  void moveTo(InstPartition &Other) {
153  Other.Set.insert(Set.begin(), Set.end());
154  Set.clear();
155  Other.DepCycle |= DepCycle;
156  }
157 
158  /// Populates the partition with a transitive closure of all the
159  /// instructions that the seeded instructions dependent on.
160  void populateUsedSet() {
161  // FIXME: We currently don't use control-dependence but simply include all
162  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
163  // up.
164  for (auto *B : OrigLoop->getBlocks())
165  Set.insert(B->getTerminator());
166 
167  // Follow the use-def chains to form a transitive closure of all the
168  // instructions that the originally seeded instructions depend on.
169  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
170  while (!Worklist.empty()) {
171  Instruction *I = Worklist.pop_back_val();
172  // Insert instructions from the loop that we depend on.
173  for (Value *V : I->operand_values()) {
174  auto *I = dyn_cast<Instruction>(V);
175  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
176  Worklist.push_back(I);
177  }
178  }
179  }
180 
181  /// Clones the original loop.
182  ///
183  /// Updates LoopInfo and DominatorTree using the information that block \p
184  /// LoopDomBB dominates the loop.
185  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
186  unsigned Index, LoopInfo *LI,
187  DominatorTree *DT) {
188  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
189  VMap, Twine(".ldist") + Twine(Index),
190  LI, DT, ClonedLoopBlocks);
191  return ClonedLoop;
192  }
193 
194  /// The cloned loop. If this partition is mapped to the original loop,
195  /// this is null.
196  const Loop *getClonedLoop() const { return ClonedLoop; }
197 
198  /// Returns the loop where this partition ends up after distribution.
199  /// If this partition is mapped to the original loop then use the block from
200  /// the loop.
201  Loop *getDistributedLoop() const {
202  return ClonedLoop ? ClonedLoop : OrigLoop;
203  }
204 
205  /// The VMap that is populated by cloning and then used in
206  /// remapinstruction to remap the cloned instructions.
207  ValueToValueMapTy &getVMap() { return VMap; }
208 
209  /// Remaps the cloned instructions using VMap.
210  void remapInstructions() {
211  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
212  }
213 
214  /// Based on the set of instructions selected for this partition,
215  /// removes the unnecessary ones.
216  void removeUnusedInsts() {
218 
219  for (auto *Block : OrigLoop->getBlocks())
220  for (auto &Inst : *Block)
221  if (!Set.count(&Inst)) {
222  Instruction *NewInst = &Inst;
223  if (!VMap.empty())
224  NewInst = cast<Instruction>(VMap[NewInst]);
225 
226  assert(!isa<BranchInst>(NewInst) &&
227  "Branches are marked used early on");
228  Unused.push_back(NewInst);
229  }
230 
231  // Delete the instructions backwards, as it has a reduced likelihood of
232  // having to update as many def-use and use-def chains.
233  for (auto *Inst : reverse(Unused)) {
234  if (!Inst->use_empty())
235  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
236  Inst->eraseFromParent();
237  }
238  }
239 
240  void print() const {
241  if (DepCycle)
242  dbgs() << " (cycle)\n";
243  for (auto *I : Set)
244  // Prefix with the block name.
245  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
246  }
247 
248  void printBlocks() const {
249  for (auto *BB : getDistributedLoop()->getBlocks())
250  dbgs() << *BB;
251  }
252 
253 private:
254  /// Instructions from OrigLoop selected for this partition.
255  InstructionSet Set;
256 
257  /// Whether this partition contains a dependence cycle.
258  bool DepCycle;
259 
260  /// The original loop.
261  Loop *OrigLoop;
262 
263  /// The cloned loop. If this partition is mapped to the original loop,
264  /// this is null.
265  Loop *ClonedLoop = nullptr;
266 
267  /// The blocks of ClonedLoop including the preheader. If this
268  /// partition is mapped to the original loop, this is empty.
269  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
270 
271  /// These gets populated once the set of instructions have been
272  /// finalized. If this partition is mapped to the original loop, these are not
273  /// set.
274  ValueToValueMapTy VMap;
275 };
276 
277 /// Holds the set of Partitions. It populates them, merges them and then
278 /// clones the loops.
279 class InstPartitionContainer {
280  using InstToPartitionIdT = DenseMap<Instruction *, int>;
281 
282 public:
283  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
284  : L(L), LI(LI), DT(DT) {}
285 
286  /// Returns the number of partitions.
287  unsigned getSize() const { return PartitionContainer.size(); }
288 
289  /// Adds \p Inst into the current partition if that is marked to
290  /// contain cycles. Otherwise start a new partition for it.
291  void addToCyclicPartition(Instruction *Inst) {
292  // If the current partition is non-cyclic. Start a new one.
293  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
294  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
295  else
296  PartitionContainer.back().add(Inst);
297  }
298 
299  /// Adds \p Inst into a partition that is not marked to contain
300  /// dependence cycles.
301  ///
302  // Initially we isolate memory instructions into as many partitions as
303  // possible, then later we may merge them back together.
304  void addToNewNonCyclicPartition(Instruction *Inst) {
305  PartitionContainer.emplace_back(Inst, L);
306  }
307 
308  /// Merges adjacent non-cyclic partitions.
309  ///
310  /// The idea is that we currently only want to isolate the non-vectorizable
311  /// partition. We could later allow more distribution among these partition
312  /// too.
313  void mergeAdjacentNonCyclic() {
314  mergeAdjacentPartitionsIf(
315  [](const InstPartition *P) { return !P->hasDepCycle(); });
316  }
317 
318  /// If a partition contains only conditional stores, we won't vectorize
319  /// it. Try to merge it with a previous cyclic partition.
320  void mergeNonIfConvertible() {
321  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
322  if (Partition->hasDepCycle())
323  return true;
324 
325  // Now, check if all stores are conditional in this partition.
326  bool seenStore = false;
327 
328  for (auto *Inst : *Partition)
329  if (isa<StoreInst>(Inst)) {
330  seenStore = true;
332  return false;
333  }
334  return seenStore;
335  });
336  }
337 
338  /// Merges the partitions according to various heuristics.
339  void mergeBeforePopulating() {
340  mergeAdjacentNonCyclic();
342  mergeNonIfConvertible();
343  }
344 
345  /// Merges partitions in order to ensure that no loads are duplicated.
346  ///
347  /// We can't duplicate loads because that could potentially reorder them.
348  /// LoopAccessAnalysis provides dependency information with the context that
349  /// the order of memory operation is preserved.
350  ///
351  /// Return if any partitions were merged.
352  bool mergeToAvoidDuplicatedLoads() {
353  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
354  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
355 
356  LoadToPartitionT LoadToPartition;
357  ToBeMergedT ToBeMerged;
358 
359  // Step through the partitions and create equivalence between partitions
360  // that contain the same load. Also put partitions in between them in the
361  // same equivalence class to avoid reordering of memory operations.
362  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
363  E = PartitionContainer.end();
364  I != E; ++I) {
365  auto *PartI = &*I;
366 
367  // If a load occurs in two partitions PartI and PartJ, merge all
368  // partitions (PartI, PartJ] into PartI.
369  for (Instruction *Inst : *PartI)
370  if (isa<LoadInst>(Inst)) {
371  bool NewElt;
372  LoadToPartitionT::iterator LoadToPart;
373 
374  std::tie(LoadToPart, NewElt) =
375  LoadToPartition.insert(std::make_pair(Inst, PartI));
376  if (!NewElt) {
377  LLVM_DEBUG(dbgs()
378  << "Merging partitions due to this load in multiple "
379  << "partitions: " << PartI << ", " << LoadToPart->second
380  << "\n"
381  << *Inst << "\n");
382 
383  auto PartJ = I;
384  do {
385  --PartJ;
386  ToBeMerged.unionSets(PartI, &*PartJ);
387  } while (&*PartJ != LoadToPart->second);
388  }
389  }
390  }
391  if (ToBeMerged.empty())
392  return false;
393 
394  // Merge the member of an equivalence class into its class leader. This
395  // makes the members empty.
396  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
397  I != E; ++I) {
398  if (!I->isLeader())
399  continue;
400 
401  auto PartI = I->getData();
402  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
403  ToBeMerged.member_end())) {
404  PartJ->moveTo(*PartI);
405  }
406  }
407 
408  // Remove the empty partitions.
409  PartitionContainer.remove_if(
410  [](const InstPartition &P) { return P.empty(); });
411 
412  return true;
413  }
414 
415  /// Sets up the mapping between instructions to partitions. If the
416  /// instruction is duplicated across multiple partitions, set the entry to -1.
417  void setupPartitionIdOnInstructions() {
418  int PartitionID = 0;
419  for (const auto &Partition : PartitionContainer) {
420  for (Instruction *Inst : Partition) {
421  bool NewElt;
422  InstToPartitionIdT::iterator Iter;
423 
424  std::tie(Iter, NewElt) =
425  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
426  if (!NewElt)
427  Iter->second = -1;
428  }
429  ++PartitionID;
430  }
431  }
432 
433  /// Populates the partition with everything that the seeding
434  /// instructions require.
435  void populateUsedSet() {
436  for (auto &P : PartitionContainer)
437  P.populateUsedSet();
438  }
439 
440  /// This performs the main chunk of the work of cloning the loops for
441  /// the partitions.
442  void cloneLoops() {
443  BasicBlock *OrigPH = L->getLoopPreheader();
444  // At this point the predecessor of the preheader is either the memcheck
445  // block or the top part of the original preheader.
446  BasicBlock *Pred = OrigPH->getSinglePredecessor();
447  assert(Pred && "Preheader does not have a single predecessor");
448  BasicBlock *ExitBlock = L->getExitBlock();
449  assert(ExitBlock && "No single exit block");
450  Loop *NewLoop;
451 
452  assert(!PartitionContainer.empty() && "at least two partitions expected");
453  // We're cloning the preheader along with the loop so we already made sure
454  // it was empty.
455  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
456  "preheader not empty");
457 
458  // Preserve the original loop ID for use after the transformation.
459  MDNode *OrigLoopID = L->getLoopID();
460 
461  // Create a loop for each partition except the last. Clone the original
462  // loop before PH along with adding a preheader for the cloned loop. Then
463  // update PH to point to the newly added preheader.
464  BasicBlock *TopPH = OrigPH;
465  unsigned Index = getSize() - 1;
466  for (auto I = std::next(PartitionContainer.rbegin()),
467  E = PartitionContainer.rend();
468  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
469  auto *Part = &*I;
470 
471  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
472 
473  Part->getVMap()[ExitBlock] = TopPH;
474  Part->remapInstructions();
475  setNewLoopID(OrigLoopID, Part);
476  }
477  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
478 
479  // Also set a new loop ID for the last loop.
480  setNewLoopID(OrigLoopID, &PartitionContainer.back());
481 
482  // Now go in forward order and update the immediate dominator for the
483  // preheaders with the exiting block of the previous loop. Dominance
484  // within the loop is updated in cloneLoopWithPreheader.
485  for (auto Curr = PartitionContainer.cbegin(),
486  Next = std::next(PartitionContainer.cbegin()),
487  E = PartitionContainer.cend();
488  Next != E; ++Curr, ++Next)
490  Next->getDistributedLoop()->getLoopPreheader(),
491  Curr->getDistributedLoop()->getExitingBlock());
492  }
493 
494  /// Removes the dead instructions from the cloned loops.
495  void removeUnusedInsts() {
496  for (auto &Partition : PartitionContainer)
497  Partition.removeUnusedInsts();
498  }
499 
500  /// For each memory pointer, it computes the partitionId the pointer is
501  /// used in.
502  ///
503  /// This returns an array of int where the I-th entry corresponds to I-th
504  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
505  /// partitions its entry is set to -1.
507  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
508  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
509 
510  unsigned N = RtPtrCheck->Pointers.size();
511  SmallVector<int, 8> PtrToPartitions(N);
512  for (unsigned I = 0; I < N; ++I) {
513  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
514  auto Instructions =
515  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
516 
517  int &Partition = PtrToPartitions[I];
518  // First set it to uninitialized.
519  Partition = -2;
520  for (Instruction *Inst : Instructions) {
521  // Note that this could be -1 if Inst is duplicated across multiple
522  // partitions.
523  int ThisPartition = this->InstToPartitionId[Inst];
524  if (Partition == -2)
525  Partition = ThisPartition;
526  // -1 means belonging to multiple partitions.
527  else if (Partition == -1)
528  break;
529  else if (Partition != (int)ThisPartition)
530  Partition = -1;
531  }
532  assert(Partition != -2 && "Pointer not belonging to any partition");
533  }
534 
535  return PtrToPartitions;
536  }
537 
538  void print(raw_ostream &OS) const {
539  unsigned Index = 0;
540  for (const auto &P : PartitionContainer) {
541  OS << "Partition " << Index++ << " (" << &P << "):\n";
542  P.print();
543  }
544  }
545 
546  void dump() const { print(dbgs()); }
547 
548 #ifndef NDEBUG
549  friend raw_ostream &operator<<(raw_ostream &OS,
550  const InstPartitionContainer &Partitions) {
551  Partitions.print(OS);
552  return OS;
553  }
554 #endif
555 
556  void printBlocks() const {
557  unsigned Index = 0;
558  for (const auto &P : PartitionContainer) {
559  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
560  P.printBlocks();
561  }
562  }
563 
564 private:
565  using PartitionContainerT = std::list<InstPartition>;
566 
567  /// List of partitions.
568  PartitionContainerT PartitionContainer;
569 
570  /// Mapping from Instruction to partition Id. If the instruction
571  /// belongs to multiple partitions the entry contains -1.
572  InstToPartitionIdT InstToPartitionId;
573 
574  Loop *L;
575  LoopInfo *LI;
576  DominatorTree *DT;
577 
578  /// The control structure to merge adjacent partitions if both satisfy
579  /// the \p Predicate.
580  template <class UnaryPredicate>
581  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
582  InstPartition *PrevMatch = nullptr;
583  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
584  auto DoesMatch = Predicate(&*I);
585  if (PrevMatch == nullptr && DoesMatch) {
586  PrevMatch = &*I;
587  ++I;
588  } else if (PrevMatch != nullptr && DoesMatch) {
589  I->moveTo(*PrevMatch);
590  I = PartitionContainer.erase(I);
591  } else {
592  PrevMatch = nullptr;
593  ++I;
594  }
595  }
596  }
597 
598  /// Assign new LoopIDs for the partition's cloned loop.
599  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
601  OrigLoopID,
603  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
604  : LLVMLoopDistributeFollowupCoincident});
605  if (PartitionID.hasValue()) {
606  Loop *NewLoop = Part->getDistributedLoop();
607  NewLoop->setLoopID(PartitionID.getValue());
608  }
609  }
610 };
611 
612 /// For each memory instruction, this class maintains difference of the
613 /// number of unsafe dependences that start out from this instruction minus
614 /// those that end here.
615 ///
616 /// By traversing the memory instructions in program order and accumulating this
617 /// number, we know whether any unsafe dependence crosses over a program point.
618 class MemoryInstructionDependences {
620 
621 public:
622  struct Entry {
623  Instruction *Inst;
624  unsigned NumUnsafeDependencesStartOrEnd = 0;
625 
626  Entry(Instruction *Inst) : Inst(Inst) {}
627  };
628 
629  using AccessesType = SmallVector<Entry, 8>;
630 
631  AccessesType::const_iterator begin() const { return Accesses.begin(); }
632  AccessesType::const_iterator end() const { return Accesses.end(); }
633 
634  MemoryInstructionDependences(
635  const SmallVectorImpl<Instruction *> &Instructions,
636  const SmallVectorImpl<Dependence> &Dependences) {
637  Accesses.append(Instructions.begin(), Instructions.end());
638 
639  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
640  for (auto &Dep : Dependences)
641  if (Dep.isPossiblyBackward()) {
642  // Note that the designations source and destination follow the program
643  // order, i.e. source is always first. (The direction is given by the
644  // DepType.)
645  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
646  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
647 
648  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
649  }
650  }
651 
652 private:
653  AccessesType Accesses;
654 };
655 
656 /// The actual class performing the per-loop work.
657 class LoopDistributeForLoop {
658 public:
659  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
661  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
662  setForced();
663  }
664 
665  /// Try to distribute an inner-most loop.
666  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
667  assert(L->empty() && "Only process inner loops.");
668 
669  LLVM_DEBUG(dbgs() << "\nLDist: In \""
670  << L->getHeader()->getParent()->getName()
671  << "\" checking " << *L << "\n");
672 
673  if (!L->getExitBlock())
674  return fail("MultipleExitBlocks", "multiple exit blocks");
675  if (!L->isLoopSimplifyForm())
676  return fail("NotLoopSimplifyForm",
677  "loop is not in loop-simplify form");
678 
679  BasicBlock *PH = L->getLoopPreheader();
680 
681  // LAA will check that we only have a single exiting block.
682  LAI = &GetLAA(*L);
683 
684  // Currently, we only distribute to isolate the part of the loop with
685  // dependence cycles to enable partial vectorization.
686  if (LAI->canVectorizeMemory())
687  return fail("MemOpsCanBeVectorized",
688  "memory operations are safe for vectorization");
689 
690  auto *Dependences = LAI->getDepChecker().getDependences();
691  if (!Dependences || Dependences->empty())
692  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
693 
694  InstPartitionContainer Partitions(L, LI, DT);
695 
696  // First, go through each memory operation and assign them to consecutive
697  // partitions (the order of partitions follows program order). Put those
698  // with unsafe dependences into "cyclic" partition otherwise put each store
699  // in its own "non-cyclic" partition (we'll merge these later).
700  //
701  // Note that a memory operation (e.g. Load2 below) at a program point that
702  // has an unsafe dependence (Store3->Load1) spanning over it must be
703  // included in the same cyclic partition as the dependent operations. This
704  // is to preserve the original program order after distribution. E.g.:
705  //
706  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
707  // Load1 -. 1 0->1
708  // Load2 | /Unsafe/ 0 1
709  // Store3 -' -1 1->0
710  // Load4 0 0
711  //
712  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
713  // we just keep assigning to the same cyclic partition until
714  // NumUnsafeDependencesActive reaches 0.
715  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
716  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
717  *Dependences);
718 
719  int NumUnsafeDependencesActive = 0;
720  for (auto &InstDep : MID) {
721  Instruction *I = InstDep.Inst;
722  // We update NumUnsafeDependencesActive post-instruction, catch the
723  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
724  if (NumUnsafeDependencesActive ||
725  InstDep.NumUnsafeDependencesStartOrEnd > 0)
726  Partitions.addToCyclicPartition(I);
727  else
728  Partitions.addToNewNonCyclicPartition(I);
729  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
730  assert(NumUnsafeDependencesActive >= 0 &&
731  "Negative number of dependences active");
732  }
733 
734  // Add partitions for values used outside. These partitions can be out of
735  // order from the original program order. This is OK because if the
736  // partition uses a load we will merge this partition with the original
737  // partition of the load that we set up in the previous loop (see
738  // mergeToAvoidDuplicatedLoads).
739  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
740  for (auto *Inst : DefsUsedOutside)
741  Partitions.addToNewNonCyclicPartition(Inst);
742 
743  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
744  if (Partitions.getSize() < 2)
745  return fail("CantIsolateUnsafeDeps",
746  "cannot isolate unsafe dependencies");
747 
748  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
749  // should be able to vectorize these together.
750  Partitions.mergeBeforePopulating();
751  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
752  if (Partitions.getSize() < 2)
753  return fail("CantIsolateUnsafeDeps",
754  "cannot isolate unsafe dependencies");
755 
756  // Now, populate the partitions with non-memory operations.
757  Partitions.populateUsedSet();
758  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
759 
760  // In order to preserve original lexical order for loads, keep them in the
761  // partition that we set up in the MemoryInstructionDependences loop.
762  if (Partitions.mergeToAvoidDuplicatedLoads()) {
763  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
764  << Partitions);
765  if (Partitions.getSize() < 2)
766  return fail("CantIsolateUnsafeDeps",
767  "cannot isolate unsafe dependencies");
768  }
769 
770  // Don't distribute the loop if we need too many SCEV run-time checks, or
771  // any if it's illegal.
772  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
773  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
774  return fail("RuntimeCheckWithConvergent",
775  "may not insert runtime check with convergent operation");
776  }
777 
778  if (Pred.getComplexity() > (IsForced.getValueOr(false)
781  return fail("TooManySCEVRuntimeChecks",
782  "too many SCEV run-time checks needed.\n");
783 
784  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
785  return fail("HeuristicDisabled", "distribution heuristic disabled");
786 
787  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
788  // We're done forming the partitions set up the reverse mapping from
789  // instructions to partitions.
790  Partitions.setupPartitionIdOnInstructions();
791 
792  // If we need run-time checks, version the loop now.
793  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
794  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
795  const auto &AllChecks = RtPtrChecking->getChecks();
796  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
797  RtPtrChecking);
798 
799  if (LAI->hasConvergentOp() && !Checks.empty()) {
800  return fail("RuntimeCheckWithConvergent",
801  "may not insert runtime check with convergent operation");
802  }
803 
804  // To keep things simple have an empty preheader before we version or clone
805  // the loop. (Also split if this has no predecessor, i.e. entry, because we
806  // rely on PH having a predecessor.)
807  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
808  SplitBlock(PH, PH->getTerminator(), DT, LI);
809 
810  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
811  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
812 
813  MDNode *OrigLoopID = L->getLoopID();
814 
815  LLVM_DEBUG(dbgs() << "\nPointers:\n");
817  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
818  LVer.setAliasChecks(std::move(Checks));
819  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
820  LVer.versionLoop(DefsUsedOutside);
821  LVer.annotateLoopWithNoAlias();
822 
823  // The unversioned loop will not be changed, so we inherit all attributes
824  // from the original loop, but remove the loop distribution metadata to
825  // avoid to distribute it again.
826  MDNode *UnversionedLoopID =
827  makeFollowupLoopID(OrigLoopID,
829  LLVMLoopDistributeFollowupFallback},
830  "llvm.loop.distribute.", true)
831  .getValue();
832  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
833  }
834 
835  // Create identical copies of the original loop for each partition and hook
836  // them up sequentially.
837  Partitions.cloneLoops();
838 
839  // Now, we remove the instruction from each loop that don't belong to that
840  // partition.
841  Partitions.removeUnusedInsts();
842  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
843  LLVM_DEBUG(Partitions.printBlocks());
844 
845  if (LDistVerify) {
846  LI->verify(*DT);
848  }
849 
850  ++NumLoopsDistributed;
851  // Report the success.
852  ORE->emit([&]() {
853  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
854  L->getHeader())
855  << "distributed loop";
856  });
857  return true;
858  }
859 
860  /// Provide diagnostics then \return with false.
861  bool fail(StringRef RemarkName, StringRef Message) {
862  LLVMContext &Ctx = F->getContext();
863  bool Forced = isForced().getValueOr(false);
864 
865  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
866 
867  // With Rpass-missed report that distribution failed.
868  ORE->emit([&]() {
869  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
870  L->getStartLoc(), L->getHeader())
871  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
872  "more "
873  "info";
874  });
875 
876  // With Rpass-analysis report why. This is on by default if distribution
877  // was requested explicitly.
878  ORE->emit(OptimizationRemarkAnalysis(
880  RemarkName, L->getStartLoc(), L->getHeader())
881  << "loop not distributed: " << Message);
882 
883  // Also issue a warning if distribution was requested explicitly but it
884  // failed.
885  if (Forced)
887  *F, L->getStartLoc(), "loop not distributed: failed "
888  "explicitly specified loop distribution"));
889 
890  return false;
891  }
892 
893  /// Return if distribution forced to be enabled/disabled for the loop.
894  ///
895  /// If the optional has a value, it indicates whether distribution was forced
896  /// to be enabled (true) or disabled (false). If the optional has no value
897  /// distribution was not forced either way.
898  const Optional<bool> &isForced() const { return IsForced; }
899 
900 private:
901  /// Filter out checks between pointers from the same partition.
902  ///
903  /// \p PtrToPartition contains the partition number for pointers. Partition
904  /// number -1 means that the pointer is used in multiple partitions. In this
905  /// case we can't safely omit the check.
906  SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
907  const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
908  const SmallVectorImpl<int> &PtrToPartition,
909  const RuntimePointerChecking *RtPtrChecking) {
911 
912  copy_if(AllChecks, std::back_inserter(Checks),
913  [&](const RuntimePointerCheck &Check) {
914  for (unsigned PtrIdx1 : Check.first->Members)
915  for (unsigned PtrIdx2 : Check.second->Members)
916  // Only include this check if there is a pair of pointers
917  // that require checking and the pointers fall into
918  // separate partitions.
919  //
920  // (Note that we already know at this point that the two
921  // pointer groups need checking but it doesn't follow
922  // that each pair of pointers within the two groups need
923  // checking as well.
924  //
925  // In other words we don't want to include a check just
926  // because there is a pair of pointers between the two
927  // pointer groups that require checks and a different
928  // pair whose pointers fall into different partitions.)
929  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
931  PtrToPartition, PtrIdx1, PtrIdx2))
932  return true;
933  return false;
934  });
935 
936  return Checks;
937  }
938 
939  /// Check whether the loop metadata is forcing distribution to be
940  /// enabled/disabled.
941  void setForced() {
943  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
944  if (!Value)
945  return;
946 
947  const MDOperand *Op = *Value;
948  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
949  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
950  }
951 
952  Loop *L;
953  Function *F;
954 
955  // Analyses used.
956  LoopInfo *LI;
957  const LoopAccessInfo *LAI = nullptr;
958  DominatorTree *DT;
959  ScalarEvolution *SE;
961 
962  /// Indicates whether distribution is forced to be enabled/disabled for
963  /// the loop.
964  ///
965  /// If the optional has a value, it indicates whether distribution was forced
966  /// to be enabled (true) or disabled (false). If the optional has no value
967  /// distribution was not forced either way.
968  Optional<bool> IsForced;
969 };
970 
971 } // end anonymous namespace
972 
973 /// Shared implementation between new and old PMs.
974 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
976  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
977  // Build up a worklist of inner-loops to vectorize. This is necessary as the
978  // act of distributing a loop creates new loops and can invalidate iterators
979  // across the loops.
980  SmallVector<Loop *, 8> Worklist;
981 
982  for (Loop *TopLevelLoop : *LI)
983  for (Loop *L : depth_first(TopLevelLoop))
984  // We only handle inner-most loops.
985  if (L->empty())
986  Worklist.push_back(L);
987 
988  // Now walk the identified inner loops.
989  bool Changed = false;
990  for (Loop *L : Worklist) {
991  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
992 
993  // If distribution was forced for the specific loop to be
994  // enabled/disabled, follow that. Otherwise use the global flag.
995  if (LDL.isForced().getValueOr(EnableLoopDistribute))
996  Changed |= LDL.processLoop(GetLAA);
997  }
998 
999  // Process each loop nest in the function.
1000  return Changed;
1001 }
1002 
1003 namespace {
1004 
1005 /// The pass class.
1006 class LoopDistributeLegacy : public FunctionPass {
1007 public:
1008  static char ID;
1009 
1010  LoopDistributeLegacy() : FunctionPass(ID) {
1011  // The default is set by the caller.
1013  }
1014 
1015  bool runOnFunction(Function &F) override {
1016  if (skipFunction(F))
1017  return false;
1018 
1019  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1020  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1021  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1022  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1023  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1024  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1025  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1026 
1027  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1028  }
1029 
1030  void getAnalysisUsage(AnalysisUsage &AU) const override {
1039  }
1040 };
1041 
1042 } // end anonymous namespace
1043 
1046  auto &LI = AM.getResult<LoopAnalysis>(F);
1047  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1048  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1050 
1051  // We don't directly need these analyses but they're required for loop
1052  // analyses so provide them below.
1053  auto &AA = AM.getResult<AAManager>(F);
1054  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1055  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1056  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1057 
1058  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1059  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1060  [&](Loop &L) -> const LoopAccessInfo & {
1061  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1062  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1063  };
1064 
1065  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1066  if (!Changed)
1067  return PreservedAnalyses::all();
1068  PreservedAnalyses PA;
1069  PA.preserve<LoopAnalysis>();
1071  PA.preserve<GlobalsAA>();
1072  return PA;
1073 }
1074 
1076 
1077 static const char ldist_name[] = "Loop Distribution";
1078 
1079 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1080  false)
1086 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1087 
1088 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:717
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
TargetTransformInfo TTI
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1537
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void push_back(const T &Elt)
Definition: SmallVector.h:246
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:159
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:870
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:233
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:150
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:680
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:289
AnalysisUsage & addRequired()
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static const char *const LLVMLoopDistributeFollowupSequential
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:224
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1208
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:136
BlockT * getHeader() const
Definition: LoopInfo.h:104
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static const char *const LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupFallback
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:515
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
for(auto &MBB :MF)
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:258
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:74
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1665
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:613
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:266
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:439
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:318
static const char *const LLVMLoopDistributeFollowupAll
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:466
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:491
This analysis provides dependence information for the memory accesses of a loop.
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:516
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:59
#define N
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:267
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2099
bool empty() const
Definition: LoopInfo.h:158
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:74
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1233
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:262
This pass exposes codegen information to IR-level passes.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:341
This header defines various interfaces for pass management in LLVM.
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:399
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
const SmallVector< RuntimePointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:94
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:953