LLVM 20.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Loop Distribution Pass. Its main focus is to
10// distribute loops that cannot be vectorized due to dependence cycles. It
11// tries to isolate the offending dependences into a new loop allowing
12// vectorization of the remaining parts.
13//
14// For dependence analysis, the pass uses the LoopVectorizer's
15// LoopAccessAnalysis. Because this analysis presumes no change in the order of
16// memory operations, special care is taken to preserve the lexical order of
17// these operations.
18//
19// Similarly to the Vectorizer, the pass also supports loop versioning to
20// run-time disambiguate potentially overlapping arrays.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SetVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/Twine.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constants.h"
47#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/Instruction.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Value.h"
57#include "llvm/Support/Debug.h"
64#include <cassert>
65#include <list>
66#include <tuple>
67#include <utility>
68
69using namespace llvm;
70
71#define LDIST_NAME "loop-distribute"
72#define DEBUG_TYPE LDIST_NAME
73
74/// @{
75/// Metadata attribute names
76static const char *const LLVMLoopDistributeFollowupAll =
77 "llvm.loop.distribute.followup_all";
78static const char *const LLVMLoopDistributeFollowupCoincident =
79 "llvm.loop.distribute.followup_coincident";
80static const char *const LLVMLoopDistributeFollowupSequential =
81 "llvm.loop.distribute.followup_sequential";
82static const char *const LLVMLoopDistributeFollowupFallback =
83 "llvm.loop.distribute.followup_fallback";
84/// @}
85
86static cl::opt<bool>
87 LDistVerify("loop-distribute-verify", cl::Hidden,
88 cl::desc("Turn on DominatorTree and LoopInfo verification "
89 "after Loop Distribution"),
90 cl::init(false));
91
93 "loop-distribute-non-if-convertible", cl::Hidden,
94 cl::desc("Whether to distribute into a loop that may not be "
95 "if-convertible by the loop vectorizer"),
96 cl::init(false));
97
99 "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
100 cl::desc("The maximum number of SCEV checks allowed for Loop "
101 "Distribution"));
102
104 "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
106 cl::desc("The maximum number of SCEV checks allowed for Loop "
107 "Distribution for loop marked with #pragma clang loop "
108 "distribute(enable)"));
109
111 "enable-loop-distribute", cl::Hidden,
112 cl::desc("Enable the new, experimental LoopDistribution Pass"),
113 cl::init(false));
114
115STATISTIC(NumLoopsDistributed, "Number of loops distributed");
116
117namespace {
118
119/// Maintains the set of instructions of the loop for a partition before
120/// cloning. After cloning, it hosts the new loop.
121class InstPartition {
122 using InstructionSet = SmallSetVector<Instruction *, 8>;
123
124public:
125 InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
126 : DepCycle(DepCycle), OrigLoop(L) {
127 Set.insert(I);
128 }
129
130 /// Returns whether this partition contains a dependence cycle.
131 bool hasDepCycle() const { return DepCycle; }
132
133 /// Adds an instruction to this partition.
134 void add(Instruction *I) { Set.insert(I); }
135
136 /// Collection accessors.
137 InstructionSet::iterator begin() { return Set.begin(); }
138 InstructionSet::iterator end() { return Set.end(); }
139 InstructionSet::const_iterator begin() const { return Set.begin(); }
140 InstructionSet::const_iterator end() const { return Set.end(); }
141 bool empty() const { return Set.empty(); }
142
143 /// Moves this partition into \p Other. This partition becomes empty
144 /// after this.
145 void moveTo(InstPartition &Other) {
146 Other.Set.insert(Set.begin(), Set.end());
147 Set.clear();
148 Other.DepCycle |= DepCycle;
149 }
150
151 /// Populates the partition with a transitive closure of all the
152 /// instructions that the seeded instructions dependent on.
153 void populateUsedSet() {
154 // FIXME: We currently don't use control-dependence but simply include all
155 // blocks (possibly empty at the end) and let simplifycfg mostly clean this
156 // up.
157 for (auto *B : OrigLoop->getBlocks())
158 Set.insert(B->getTerminator());
159
160 // Follow the use-def chains to form a transitive closure of all the
161 // instructions that the originally seeded instructions depend on.
162 SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
163 while (!Worklist.empty()) {
164 Instruction *I = Worklist.pop_back_val();
165 // Insert instructions from the loop that we depend on.
166 for (Value *V : I->operand_values()) {
167 auto *I = dyn_cast<Instruction>(V);
168 if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
169 Worklist.push_back(I);
170 }
171 }
172 }
173
174 /// Clones the original loop.
175 ///
176 /// Updates LoopInfo and DominatorTree using the information that block \p
177 /// LoopDomBB dominates the loop.
178 Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
179 unsigned Index, LoopInfo *LI,
180 DominatorTree *DT) {
181 ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
182 VMap, Twine(".ldist") + Twine(Index),
183 LI, DT, ClonedLoopBlocks);
184 return ClonedLoop;
185 }
186
187 /// The cloned loop. If this partition is mapped to the original loop,
188 /// this is null.
189 const Loop *getClonedLoop() const { return ClonedLoop; }
190
191 /// Returns the loop where this partition ends up after distribution.
192 /// If this partition is mapped to the original loop then use the block from
193 /// the loop.
194 Loop *getDistributedLoop() const {
195 return ClonedLoop ? ClonedLoop : OrigLoop;
196 }
197
198 /// The VMap that is populated by cloning and then used in
199 /// remapinstruction to remap the cloned instructions.
200 ValueToValueMapTy &getVMap() { return VMap; }
201
202 /// Remaps the cloned instructions using VMap.
203 void remapInstructions() {
204 remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
205 }
206
207 /// Based on the set of instructions selected for this partition,
208 /// removes the unnecessary ones.
209 void removeUnusedInsts() {
211
212 for (auto *Block : OrigLoop->getBlocks())
213 for (auto &Inst : *Block)
214 if (!Set.count(&Inst)) {
215 Instruction *NewInst = &Inst;
216 if (!VMap.empty())
217 NewInst = cast<Instruction>(VMap[NewInst]);
218
219 assert(!isa<BranchInst>(NewInst) &&
220 "Branches are marked used early on");
221 Unused.push_back(NewInst);
222 }
223
224 // Delete the instructions backwards, as it has a reduced likelihood of
225 // having to update as many def-use and use-def chains.
226 for (auto *Inst : reverse(Unused)) {
227 if (!Inst->use_empty())
228 Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
229 Inst->eraseFromParent();
230 }
231 }
232
233 void print(raw_ostream &OS) const {
234 OS << (DepCycle ? " (cycle)\n" : "\n");
235 for (auto *I : Set)
236 // Prefix with the block name.
237 OS << " " << I->getParent()->getName() << ":" << *I << "\n";
238 }
239
240 void printBlocks(raw_ostream &OS) const {
241 for (auto *BB : getDistributedLoop()->getBlocks())
242 OS << *BB;
243 }
244
245private:
246 /// Instructions from OrigLoop selected for this partition.
247 InstructionSet Set;
248
249 /// Whether this partition contains a dependence cycle.
250 bool DepCycle;
251
252 /// The original loop.
253 Loop *OrigLoop;
254
255 /// The cloned loop. If this partition is mapped to the original loop,
256 /// this is null.
257 Loop *ClonedLoop = nullptr;
258
259 /// The blocks of ClonedLoop including the preheader. If this
260 /// partition is mapped to the original loop, this is empty.
261 SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
262
263 /// These gets populated once the set of instructions have been
264 /// finalized. If this partition is mapped to the original loop, these are not
265 /// set.
267};
268
269/// Holds the set of Partitions. It populates them, merges them and then
270/// clones the loops.
271class InstPartitionContainer {
272 using InstToPartitionIdT = DenseMap<Instruction *, int>;
273
274public:
275 InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
276 : L(L), LI(LI), DT(DT) {}
277
278 /// Returns the number of partitions.
279 unsigned getSize() const { return PartitionContainer.size(); }
280
281 /// Adds \p Inst into the current partition if that is marked to
282 /// contain cycles. Otherwise start a new partition for it.
283 void addToCyclicPartition(Instruction *Inst) {
284 // If the current partition is non-cyclic. Start a new one.
285 if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
286 PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
287 else
288 PartitionContainer.back().add(Inst);
289 }
290
291 /// Adds \p Inst into a partition that is not marked to contain
292 /// dependence cycles.
293 ///
294 // Initially we isolate memory instructions into as many partitions as
295 // possible, then later we may merge them back together.
296 void addToNewNonCyclicPartition(Instruction *Inst) {
297 PartitionContainer.emplace_back(Inst, L);
298 }
299
300 /// Merges adjacent non-cyclic partitions.
301 ///
302 /// The idea is that we currently only want to isolate the non-vectorizable
303 /// partition. We could later allow more distribution among these partition
304 /// too.
305 void mergeAdjacentNonCyclic() {
306 mergeAdjacentPartitionsIf(
307 [](const InstPartition *P) { return !P->hasDepCycle(); });
308 }
309
310 /// If a partition contains only conditional stores, we won't vectorize
311 /// it. Try to merge it with a previous cyclic partition.
312 void mergeNonIfConvertible() {
313 mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
314 if (Partition->hasDepCycle())
315 return true;
316
317 // Now, check if all stores are conditional in this partition.
318 bool seenStore = false;
319
320 for (auto *Inst : *Partition)
321 if (isa<StoreInst>(Inst)) {
322 seenStore = true;
324 return false;
325 }
326 return seenStore;
327 });
328 }
329
330 /// Merges the partitions according to various heuristics.
331 void mergeBeforePopulating() {
332 mergeAdjacentNonCyclic();
334 mergeNonIfConvertible();
335 }
336
337 /// Merges partitions in order to ensure that no loads are duplicated.
338 ///
339 /// We can't duplicate loads because that could potentially reorder them.
340 /// LoopAccessAnalysis provides dependency information with the context that
341 /// the order of memory operation is preserved.
342 ///
343 /// Return if any partitions were merged.
344 bool mergeToAvoidDuplicatedLoads() {
345 using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
346 using ToBeMergedT = EquivalenceClasses<InstPartition *>;
347
348 LoadToPartitionT LoadToPartition;
349 ToBeMergedT ToBeMerged;
350
351 // Step through the partitions and create equivalence between partitions
352 // that contain the same load. Also put partitions in between them in the
353 // same equivalence class to avoid reordering of memory operations.
354 for (PartitionContainerT::iterator I = PartitionContainer.begin(),
355 E = PartitionContainer.end();
356 I != E; ++I) {
357 auto *PartI = &*I;
358
359 // If a load occurs in two partitions PartI and PartJ, merge all
360 // partitions (PartI, PartJ] into PartI.
361 for (Instruction *Inst : *PartI)
362 if (isa<LoadInst>(Inst)) {
363 bool NewElt;
364 LoadToPartitionT::iterator LoadToPart;
365
366 std::tie(LoadToPart, NewElt) =
367 LoadToPartition.insert(std::make_pair(Inst, PartI));
368 if (!NewElt) {
370 dbgs()
371 << "LDist: Merging partitions due to this load in multiple "
372 << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
373 << *Inst << "\n");
374
375 auto PartJ = I;
376 do {
377 --PartJ;
378 ToBeMerged.unionSets(PartI, &*PartJ);
379 } while (&*PartJ != LoadToPart->second);
380 }
381 }
382 }
383 if (ToBeMerged.empty())
384 return false;
385
386 // Merge the member of an equivalence class into its class leader. This
387 // makes the members empty.
388 for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
389 I != E; ++I) {
390 if (!I->isLeader())
391 continue;
392
393 auto PartI = I->getData();
394 for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
395 ToBeMerged.member_end())) {
396 PartJ->moveTo(*PartI);
397 }
398 }
399
400 // Remove the empty partitions.
401 PartitionContainer.remove_if(
402 [](const InstPartition &P) { return P.empty(); });
403
404 return true;
405 }
406
407 /// Sets up the mapping between instructions to partitions. If the
408 /// instruction is duplicated across multiple partitions, set the entry to -1.
409 void setupPartitionIdOnInstructions() {
410 int PartitionID = 0;
411 for (const auto &Partition : PartitionContainer) {
412 for (Instruction *Inst : Partition) {
413 bool NewElt;
415
416 std::tie(Iter, NewElt) =
417 InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
418 if (!NewElt)
419 Iter->second = -1;
420 }
421 ++PartitionID;
422 }
423 }
424
425 /// Populates the partition with everything that the seeding
426 /// instructions require.
427 void populateUsedSet() {
428 for (auto &P : PartitionContainer)
429 P.populateUsedSet();
430 }
431
432 /// This performs the main chunk of the work of cloning the loops for
433 /// the partitions.
434 void cloneLoops() {
435 BasicBlock *OrigPH = L->getLoopPreheader();
436 // At this point the predecessor of the preheader is either the memcheck
437 // block or the top part of the original preheader.
438 BasicBlock *Pred = OrigPH->getSinglePredecessor();
439 assert(Pred && "Preheader does not have a single predecessor");
440 BasicBlock *ExitBlock = L->getExitBlock();
441 assert(ExitBlock && "No single exit block");
442 Loop *NewLoop;
443
444 assert(!PartitionContainer.empty() && "at least two partitions expected");
445 // We're cloning the preheader along with the loop so we already made sure
446 // it was empty.
447 assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
448 "preheader not empty");
449
450 // Preserve the original loop ID for use after the transformation.
451 MDNode *OrigLoopID = L->getLoopID();
452
453 // Create a loop for each partition except the last. Clone the original
454 // loop before PH along with adding a preheader for the cloned loop. Then
455 // update PH to point to the newly added preheader.
456 BasicBlock *TopPH = OrigPH;
457 unsigned Index = getSize() - 1;
458 for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
459 NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
460
461 Part.getVMap()[ExitBlock] = TopPH;
462 Part.remapInstructions();
463 setNewLoopID(OrigLoopID, &Part);
464 --Index;
465 TopPH = NewLoop->getLoopPreheader();
466 }
467 Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
468
469 // Also set a new loop ID for the last loop.
470 setNewLoopID(OrigLoopID, &PartitionContainer.back());
471
472 // Now go in forward order and update the immediate dominator for the
473 // preheaders with the exiting block of the previous loop. Dominance
474 // within the loop is updated in cloneLoopWithPreheader.
475 for (auto Curr = PartitionContainer.cbegin(),
476 Next = std::next(PartitionContainer.cbegin()),
477 E = PartitionContainer.cend();
478 Next != E; ++Curr, ++Next)
480 Next->getDistributedLoop()->getLoopPreheader(),
481 Curr->getDistributedLoop()->getExitingBlock());
482 }
483
484 /// Removes the dead instructions from the cloned loops.
485 void removeUnusedInsts() {
486 for (auto &Partition : PartitionContainer)
487 Partition.removeUnusedInsts();
488 }
489
490 /// For each memory pointer, it computes the partitionId the pointer is
491 /// used in.
492 ///
493 /// This returns an array of int where the I-th entry corresponds to I-th
494 /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
495 /// partitions its entry is set to -1.
497 computePartitionSetForPointers(const LoopAccessInfo &LAI) {
498 const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
499
500 unsigned N = RtPtrCheck->Pointers.size();
501 SmallVector<int, 8> PtrToPartitions(N);
502 for (unsigned I = 0; I < N; ++I) {
503 Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
504 auto Instructions =
505 LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
506
507 int &Partition = PtrToPartitions[I];
508 // First set it to uninitialized.
509 Partition = -2;
510 for (Instruction *Inst : Instructions) {
511 // Note that this could be -1 if Inst is duplicated across multiple
512 // partitions.
513 int ThisPartition = this->InstToPartitionId[Inst];
514 if (Partition == -2)
515 Partition = ThisPartition;
516 // -1 means belonging to multiple partitions.
517 else if (Partition == -1)
518 break;
519 else if (Partition != (int)ThisPartition)
520 Partition = -1;
521 }
522 assert(Partition != -2 && "Pointer not belonging to any partition");
523 }
524
525 return PtrToPartitions;
526 }
527
528 void print(raw_ostream &OS) const {
529 unsigned Index = 0;
530 for (const auto &P : PartitionContainer) {
531 OS << "LDist: Partition " << Index++ << ":";
532 P.print(OS);
533 }
534 }
535
536 void dump() const { print(dbgs()); }
537
538#ifndef NDEBUG
540 const InstPartitionContainer &Partitions) {
541 Partitions.print(OS);
542 return OS;
543 }
544#endif
545
546 void printBlocks(raw_ostream &OS) const {
547 unsigned Index = 0;
548 for (const auto &P : PartitionContainer) {
549 OS << "LDist: Partition " << Index++ << ":";
550 P.printBlocks(OS);
551 }
552 }
553
554private:
555 using PartitionContainerT = std::list<InstPartition>;
556
557 /// List of partitions.
558 PartitionContainerT PartitionContainer;
559
560 /// Mapping from Instruction to partition Id. If the instruction
561 /// belongs to multiple partitions the entry contains -1.
562 InstToPartitionIdT InstToPartitionId;
563
564 Loop *L;
565 LoopInfo *LI;
566 DominatorTree *DT;
567
568 /// The control structure to merge adjacent partitions if both satisfy
569 /// the \p Predicate.
570 template <class UnaryPredicate>
571 void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
572 InstPartition *PrevMatch = nullptr;
573 for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
574 auto DoesMatch = Predicate(&*I);
575 if (PrevMatch == nullptr && DoesMatch) {
576 PrevMatch = &*I;
577 ++I;
578 } else if (PrevMatch != nullptr && DoesMatch) {
579 I->moveTo(*PrevMatch);
580 I = PartitionContainer.erase(I);
581 } else {
582 PrevMatch = nullptr;
583 ++I;
584 }
585 }
586 }
587
588 /// Assign new LoopIDs for the partition's cloned loop.
589 void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
590 std::optional<MDNode *> PartitionID = makeFollowupLoopID(
591 OrigLoopID,
593 Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
595 if (PartitionID) {
596 Loop *NewLoop = Part->getDistributedLoop();
597 NewLoop->setLoopID(*PartitionID);
598 }
599 }
600};
601
602/// For each memory instruction, this class maintains difference of the
603/// number of unsafe dependences that start out from this instruction minus
604/// those that end here.
605///
606/// By traversing the memory instructions in program order and accumulating this
607/// number, we know whether any unsafe dependence crosses over a program point.
608class MemoryInstructionDependences {
610
611public:
612 struct Entry {
613 Instruction *Inst;
614 unsigned NumUnsafeDependencesStartOrEnd = 0;
615
616 Entry(Instruction *Inst) : Inst(Inst) {}
617 };
618
619 using AccessesType = SmallVector<Entry, 8>;
620
621 AccessesType::const_iterator begin() const { return Accesses.begin(); }
622 AccessesType::const_iterator end() const { return Accesses.end(); }
623
624 MemoryInstructionDependences(
625 const SmallVectorImpl<Instruction *> &Instructions,
626 const SmallVectorImpl<Dependence> &Dependences) {
627 Accesses.append(Instructions.begin(), Instructions.end());
628
629 LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
630 for (const auto &Dep : Dependences)
631 if (Dep.isPossiblyBackward()) {
632 // Note that the designations source and destination follow the program
633 // order, i.e. source is always first. (The direction is given by the
634 // DepType.)
635 ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
636 --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
637
638 LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
639 }
640 }
641
642private:
643 AccessesType Accesses;
644};
645
646/// The actual class performing the per-loop work.
647class LoopDistributeForLoop {
648public:
649 LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
652 : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
653 setForced();
654 }
655
656 /// Try to distribute an inner-most loop.
657 bool processLoop() {
658 assert(L->isInnermost() && "Only process inner loops.");
659
660 LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
661 << L->getHeader()->getParent()->getName() << "' from "
662 << L->getLocStr() << "\n");
663
664 // Having a single exit block implies there's also one exiting block.
665 if (!L->getExitBlock())
666 return fail("MultipleExitBlocks", "multiple exit blocks");
667 if (!L->isLoopSimplifyForm())
668 return fail("NotLoopSimplifyForm",
669 "loop is not in loop-simplify form");
670 if (!L->isRotatedForm())
671 return fail("NotBottomTested", "loop is not bottom tested");
672
673 BasicBlock *PH = L->getLoopPreheader();
674
675 LAI = &LAIs.getInfo(*L);
676
677 // Currently, we only distribute to isolate the part of the loop with
678 // dependence cycles to enable partial vectorization.
679 if (LAI->canVectorizeMemory())
680 return fail("MemOpsCanBeVectorized",
681 "memory operations are safe for vectorization");
682
683 auto *Dependences = LAI->getDepChecker().getDependences();
684 if (!Dependences || Dependences->empty())
685 return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
686
687 LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
688 << L->getHeader()->getName() << "\n");
689
690 InstPartitionContainer Partitions(L, LI, DT);
691
692 // First, go through each memory operation and assign them to consecutive
693 // partitions (the order of partitions follows program order). Put those
694 // with unsafe dependences into "cyclic" partition otherwise put each store
695 // in its own "non-cyclic" partition (we'll merge these later).
696 //
697 // Note that a memory operation (e.g. Load2 below) at a program point that
698 // has an unsafe dependence (Store3->Load1) spanning over it must be
699 // included in the same cyclic partition as the dependent operations. This
700 // is to preserve the original program order after distribution. E.g.:
701 //
702 // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
703 // Load1 -. 1 0->1
704 // Load2 | /Unsafe/ 0 1
705 // Store3 -' -1 1->0
706 // Load4 0 0
707 //
708 // NumUnsafeDependencesActive > 0 indicates this situation and in this case
709 // we just keep assigning to the same cyclic partition until
710 // NumUnsafeDependencesActive reaches 0.
711 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
712 MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
713 *Dependences);
714
715 int NumUnsafeDependencesActive = 0;
716 for (const auto &InstDep : MID) {
717 Instruction *I = InstDep.Inst;
718 // We update NumUnsafeDependencesActive post-instruction, catch the
719 // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
720 if (NumUnsafeDependencesActive ||
721 InstDep.NumUnsafeDependencesStartOrEnd > 0)
722 Partitions.addToCyclicPartition(I);
723 else
724 Partitions.addToNewNonCyclicPartition(I);
725 NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
726 assert(NumUnsafeDependencesActive >= 0 &&
727 "Negative number of dependences active");
728 }
729
730 // Add partitions for values used outside. These partitions can be out of
731 // order from the original program order. This is OK because if the
732 // partition uses a load we will merge this partition with the original
733 // partition of the load that we set up in the previous loop (see
734 // mergeToAvoidDuplicatedLoads).
735 auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
736 for (auto *Inst : DefsUsedOutside)
737 Partitions.addToNewNonCyclicPartition(Inst);
738
739 LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
740 if (Partitions.getSize() < 2)
741 return fail("CantIsolateUnsafeDeps",
742 "cannot isolate unsafe dependencies");
743
744 // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
745 // should be able to vectorize these together.
746 Partitions.mergeBeforePopulating();
747 LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
748 if (Partitions.getSize() < 2)
749 return fail("CantIsolateUnsafeDeps",
750 "cannot isolate unsafe dependencies");
751
752 // Now, populate the partitions with non-memory operations.
753 Partitions.populateUsedSet();
754 LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
755
756 // In order to preserve original lexical order for loads, keep them in the
757 // partition that we set up in the MemoryInstructionDependences loop.
758 if (Partitions.mergeToAvoidDuplicatedLoads()) {
759 LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
760 << Partitions);
761 if (Partitions.getSize() < 2)
762 return fail("CantIsolateUnsafeDeps",
763 "cannot isolate unsafe dependencies");
764 }
765
766 // Don't distribute the loop if we need too many SCEV run-time checks, or
767 // any if it's illegal.
768 const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
769 if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
770 return fail("RuntimeCheckWithConvergent",
771 "may not insert runtime check with convergent operation");
772 }
773
774 if (Pred.getComplexity() > (IsForced.value_or(false)
777 return fail("TooManySCEVRuntimeChecks",
778 "too many SCEV run-time checks needed.\n");
779
780 if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
781 return fail("HeuristicDisabled", "distribution heuristic disabled");
782
783 LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
784 << L->getHeader()->getName() << "\n");
785 // We're done forming the partitions set up the reverse mapping from
786 // instructions to partitions.
787 Partitions.setupPartitionIdOnInstructions();
788
789 // If we need run-time checks, version the loop now.
790 auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
791 const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
792 const auto &AllChecks = RtPtrChecking->getChecks();
793 auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
794 RtPtrChecking);
795
796 if (LAI->hasConvergentOp() && !Checks.empty()) {
797 return fail("RuntimeCheckWithConvergent",
798 "may not insert runtime check with convergent operation");
799 }
800
801 // To keep things simple have an empty preheader before we version or clone
802 // the loop. (Also split if this has no predecessor, i.e. entry, because we
803 // rely on PH having a predecessor.)
804 if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
805 SplitBlock(PH, PH->getTerminator(), DT, LI);
806
807 if (!Pred.isAlwaysTrue() || !Checks.empty()) {
808 assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
809
810 MDNode *OrigLoopID = L->getLoopID();
811
812 LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
814 LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
815 LVer.versionLoop(DefsUsedOutside);
816 LVer.annotateLoopWithNoAlias();
817
818 // The unversioned loop will not be changed, so we inherit all attributes
819 // from the original loop, but remove the loop distribution metadata to
820 // avoid to distribute it again.
821 MDNode *UnversionedLoopID = *makeFollowupLoopID(
822 OrigLoopID,
824 "llvm.loop.distribute.", true);
825 LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
826 }
827
828 // Create identical copies of the original loop for each partition and hook
829 // them up sequentially.
830 Partitions.cloneLoops();
831
832 // Now, we remove the instruction from each loop that don't belong to that
833 // partition.
834 Partitions.removeUnusedInsts();
835 LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
836 LLVM_DEBUG(Partitions.printBlocks(dbgs()));
837
838 if (LDistVerify) {
839 LI->verify(*DT);
840 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
841 }
842
843 ++NumLoopsDistributed;
844 // Report the success.
845 ORE->emit([&]() {
846 return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
847 L->getHeader())
848 << "distributed loop";
849 });
850 return true;
851 }
852
853 /// Provide diagnostics then \return with false.
854 bool fail(StringRef RemarkName, StringRef Message) {
855 LLVMContext &Ctx = F->getContext();
856 bool Forced = isForced().value_or(false);
857
858 LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
859
860 // With Rpass-missed report that distribution failed.
861 ORE->emit([&]() {
862 return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
863 L->getStartLoc(), L->getHeader())
864 << "loop not distributed: use -Rpass-analysis=loop-distribute for "
865 "more "
866 "info";
867 });
868
869 // With Rpass-analysis report why. This is on by default if distribution
870 // was requested explicitly.
873 RemarkName, L->getStartLoc(), L->getHeader())
874 << "loop not distributed: " << Message);
875
876 // Also issue a warning if distribution was requested explicitly but it
877 // failed.
878 if (Forced)
880 *F, L->getStartLoc(), "loop not distributed: failed "
881 "explicitly specified loop distribution"));
882
883 return false;
884 }
885
886 /// Return if distribution forced to be enabled/disabled for the loop.
887 ///
888 /// If the optional has a value, it indicates whether distribution was forced
889 /// to be enabled (true) or disabled (false). If the optional has no value
890 /// distribution was not forced either way.
891 const std::optional<bool> &isForced() const { return IsForced; }
892
893private:
894 /// Filter out checks between pointers from the same partition.
895 ///
896 /// \p PtrToPartition contains the partition number for pointers. Partition
897 /// number -1 means that the pointer is used in multiple partitions. In this
898 /// case we can't safely omit the check.
899 SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
901 const SmallVectorImpl<int> &PtrToPartition,
902 const RuntimePointerChecking *RtPtrChecking) {
904
905 copy_if(AllChecks, std::back_inserter(Checks),
906 [&](const RuntimePointerCheck &Check) {
907 for (unsigned PtrIdx1 : Check.first->Members)
908 for (unsigned PtrIdx2 : Check.second->Members)
909 // Only include this check if there is a pair of pointers
910 // that require checking and the pointers fall into
911 // separate partitions.
912 //
913 // (Note that we already know at this point that the two
914 // pointer groups need checking but it doesn't follow
915 // that each pair of pointers within the two groups need
916 // checking as well.
917 //
918 // In other words we don't want to include a check just
919 // because there is a pair of pointers between the two
920 // pointer groups that require checks and a different
921 // pair whose pointers fall into different partitions.)
922 if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
924 PtrToPartition, PtrIdx1, PtrIdx2))
925 return true;
926 return false;
927 });
928
929 return Checks;
930 }
931
932 /// Check whether the loop metadata is forcing distribution to be
933 /// enabled/disabled.
934 void setForced() {
935 std::optional<const MDOperand *> Value =
936 findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
937 if (!Value)
938 return;
939
940 const MDOperand *Op = *Value;
941 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
942 IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
943 }
944
945 Loop *L;
946 Function *F;
947
948 // Analyses used.
949 LoopInfo *LI;
950 const LoopAccessInfo *LAI = nullptr;
951 DominatorTree *DT;
952 ScalarEvolution *SE;
955
956 /// Indicates whether distribution is forced to be enabled/disabled for
957 /// the loop.
958 ///
959 /// If the optional has a value, it indicates whether distribution was forced
960 /// to be enabled (true) or disabled (false). If the optional has no value
961 /// distribution was not forced either way.
962 std::optional<bool> IsForced;
963};
964
965} // end anonymous namespace
966
967static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
969 LoopAccessInfoManager &LAIs) {
970 // Build up a worklist of inner-loops to distribute. This is necessary as the
971 // act of distributing a loop creates new loops and can invalidate iterators
972 // across the loops.
973 SmallVector<Loop *, 8> Worklist;
974
975 for (Loop *TopLevelLoop : *LI)
976 for (Loop *L : depth_first(TopLevelLoop))
977 // We only handle inner-most loops.
978 if (L->isInnermost())
979 Worklist.push_back(L);
980
981 // Now walk the identified inner loops.
982 bool Changed = false;
983 for (Loop *L : Worklist) {
984 LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
985
986 // If distribution was forced for the specific loop to be
987 // enabled/disabled, follow that. Otherwise use the global flag.
988 if (LDL.isForced().value_or(EnableLoopDistribute))
989 Changed |= LDL.processLoop();
990 }
991
992 // Process each loop nest in the function.
993 return Changed;
994}
995
998 auto &LI = AM.getResult<LoopAnalysis>(F);
999 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1000 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1002
1004 bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
1005 if (!Changed)
1006 return PreservedAnalyses::all();
1008 PA.preserve<LoopAnalysis>();
1010 return PA;
1011}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1313
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runImpl(Function &F, const TargetLowering &TLI)
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static const char *const LLVMLoopDistributeFollowupCoincident
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
#define LDIST_NAME
static const char *const LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupAll
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma clang loop " "distribute(enable)"))
static const char *const LLVMLoopDistributeFollowupFallback
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, LoopAccessInfoManager &LAIs)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
static unsigned getSize(unsigned Kind)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class represents an Operation in the Expression.
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
Dependence - This class represents a dependence between two memory memory references in a function.
Diagnostic information for optimization failures.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This analysis provides dependence information for the memory accesses of a loop.
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
Metadata node.
Definition: Metadata.h:1069
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
const SCEVPredicate & getPredicate() const
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
typename vector_type::const_iterator iterator
Definition: SetVector.h:69
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:70
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
LLVM Value Representation.
Definition: Value.h:74
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ Entry
Definition: COFF.h:844
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1065
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
#define N
Dependece between memory access instructions.