LLVM 19.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Loop Distribution Pass. Its main focus is to
10// distribute loops that cannot be vectorized due to dependence cycles. It
11// tries to isolate the offending dependences into a new loop allowing
12// vectorization of the remaining parts.
13//
14// For dependence analysis, the pass uses the LoopVectorizer's
15// LoopAccessAnalysis. Because this analysis presumes no change in the order of
16// memory operations, special care is taken to preserve the lexical order of
17// these operations.
18//
19// Similarly to the Vectorizer, the pass also supports loop versioning to
20// run-time disambiguate potentially overlapping arrays.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SetVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/Twine.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constants.h"
47#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/Instruction.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Value.h"
57#include "llvm/Support/Debug.h"
64#include <cassert>
65#include <functional>
66#include <list>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define LDIST_NAME "loop-distribute"
73#define DEBUG_TYPE LDIST_NAME
74
75/// @{
76/// Metadata attribute names
77static const char *const LLVMLoopDistributeFollowupAll =
78 "llvm.loop.distribute.followup_all";
79static const char *const LLVMLoopDistributeFollowupCoincident =
80 "llvm.loop.distribute.followup_coincident";
81static const char *const LLVMLoopDistributeFollowupSequential =
82 "llvm.loop.distribute.followup_sequential";
83static const char *const LLVMLoopDistributeFollowupFallback =
84 "llvm.loop.distribute.followup_fallback";
85/// @}
86
87static cl::opt<bool>
88 LDistVerify("loop-distribute-verify", cl::Hidden,
89 cl::desc("Turn on DominatorTree and LoopInfo verification "
90 "after Loop Distribution"),
91 cl::init(false));
92
94 "loop-distribute-non-if-convertible", cl::Hidden,
95 cl::desc("Whether to distribute into a loop that may not be "
96 "if-convertible by the loop vectorizer"),
97 cl::init(false));
98
100 "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
101 cl::desc("The maximum number of SCEV checks allowed for Loop "
102 "Distribution"));
103
105 "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
107 cl::desc("The maximum number of SCEV checks allowed for Loop "
108 "Distribution for loop marked with #pragma clang loop "
109 "distribute(enable)"));
110
112 "enable-loop-distribute", cl::Hidden,
113 cl::desc("Enable the new, experimental LoopDistribution Pass"),
114 cl::init(false));
115
116STATISTIC(NumLoopsDistributed, "Number of loops distributed");
117
118namespace {
119
120/// Maintains the set of instructions of the loop for a partition before
121/// cloning. After cloning, it hosts the new loop.
122class InstPartition {
123 using InstructionSet = SmallSetVector<Instruction *, 8>;
124
125public:
126 InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
127 : DepCycle(DepCycle), OrigLoop(L) {
128 Set.insert(I);
129 }
130
131 /// Returns whether this partition contains a dependence cycle.
132 bool hasDepCycle() const { return DepCycle; }
133
134 /// Adds an instruction to this partition.
135 void add(Instruction *I) { Set.insert(I); }
136
137 /// Collection accessors.
138 InstructionSet::iterator begin() { return Set.begin(); }
139 InstructionSet::iterator end() { return Set.end(); }
140 InstructionSet::const_iterator begin() const { return Set.begin(); }
141 InstructionSet::const_iterator end() const { return Set.end(); }
142 bool empty() const { return Set.empty(); }
143
144 /// Moves this partition into \p Other. This partition becomes empty
145 /// after this.
146 void moveTo(InstPartition &Other) {
147 Other.Set.insert(Set.begin(), Set.end());
148 Set.clear();
149 Other.DepCycle |= DepCycle;
150 }
151
152 /// Populates the partition with a transitive closure of all the
153 /// instructions that the seeded instructions dependent on.
154 void populateUsedSet() {
155 // FIXME: We currently don't use control-dependence but simply include all
156 // blocks (possibly empty at the end) and let simplifycfg mostly clean this
157 // up.
158 for (auto *B : OrigLoop->getBlocks())
159 Set.insert(B->getTerminator());
160
161 // Follow the use-def chains to form a transitive closure of all the
162 // instructions that the originally seeded instructions depend on.
163 SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
164 while (!Worklist.empty()) {
165 Instruction *I = Worklist.pop_back_val();
166 // Insert instructions from the loop that we depend on.
167 for (Value *V : I->operand_values()) {
168 auto *I = dyn_cast<Instruction>(V);
169 if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
170 Worklist.push_back(I);
171 }
172 }
173 }
174
175 /// Clones the original loop.
176 ///
177 /// Updates LoopInfo and DominatorTree using the information that block \p
178 /// LoopDomBB dominates the loop.
179 Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
180 unsigned Index, LoopInfo *LI,
181 DominatorTree *DT) {
182 ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
183 VMap, Twine(".ldist") + Twine(Index),
184 LI, DT, ClonedLoopBlocks);
185 return ClonedLoop;
186 }
187
188 /// The cloned loop. If this partition is mapped to the original loop,
189 /// this is null.
190 const Loop *getClonedLoop() const { return ClonedLoop; }
191
192 /// Returns the loop where this partition ends up after distribution.
193 /// If this partition is mapped to the original loop then use the block from
194 /// the loop.
195 Loop *getDistributedLoop() const {
196 return ClonedLoop ? ClonedLoop : OrigLoop;
197 }
198
199 /// The VMap that is populated by cloning and then used in
200 /// remapinstruction to remap the cloned instructions.
201 ValueToValueMapTy &getVMap() { return VMap; }
202
203 /// Remaps the cloned instructions using VMap.
204 void remapInstructions() {
205 remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
206 }
207
208 /// Based on the set of instructions selected for this partition,
209 /// removes the unnecessary ones.
210 void removeUnusedInsts() {
212
213 for (auto *Block : OrigLoop->getBlocks())
214 for (auto &Inst : *Block)
215 if (!Set.count(&Inst)) {
216 Instruction *NewInst = &Inst;
217 if (!VMap.empty())
218 NewInst = cast<Instruction>(VMap[NewInst]);
219
220 assert(!isa<BranchInst>(NewInst) &&
221 "Branches are marked used early on");
222 Unused.push_back(NewInst);
223 }
224
225 // Delete the instructions backwards, as it has a reduced likelihood of
226 // having to update as many def-use and use-def chains.
227 for (auto *Inst : reverse(Unused)) {
228 if (!Inst->use_empty())
229 Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
230 Inst->eraseFromParent();
231 }
232 }
233
234 void print(raw_ostream &OS) const {
235 OS << (DepCycle ? " (cycle)\n" : "\n");
236 for (auto *I : Set)
237 // Prefix with the block name.
238 OS << " " << I->getParent()->getName() << ":" << *I << "\n";
239 }
240
241 void printBlocks(raw_ostream &OS) const {
242 for (auto *BB : getDistributedLoop()->getBlocks())
243 OS << *BB;
244 }
245
246private:
247 /// Instructions from OrigLoop selected for this partition.
248 InstructionSet Set;
249
250 /// Whether this partition contains a dependence cycle.
251 bool DepCycle;
252
253 /// The original loop.
254 Loop *OrigLoop;
255
256 /// The cloned loop. If this partition is mapped to the original loop,
257 /// this is null.
258 Loop *ClonedLoop = nullptr;
259
260 /// The blocks of ClonedLoop including the preheader. If this
261 /// partition is mapped to the original loop, this is empty.
262 SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
263
264 /// These gets populated once the set of instructions have been
265 /// finalized. If this partition is mapped to the original loop, these are not
266 /// set.
268};
269
270/// Holds the set of Partitions. It populates them, merges them and then
271/// clones the loops.
272class InstPartitionContainer {
273 using InstToPartitionIdT = DenseMap<Instruction *, int>;
274
275public:
276 InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
277 : L(L), LI(LI), DT(DT) {}
278
279 /// Returns the number of partitions.
280 unsigned getSize() const { return PartitionContainer.size(); }
281
282 /// Adds \p Inst into the current partition if that is marked to
283 /// contain cycles. Otherwise start a new partition for it.
284 void addToCyclicPartition(Instruction *Inst) {
285 // If the current partition is non-cyclic. Start a new one.
286 if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
287 PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
288 else
289 PartitionContainer.back().add(Inst);
290 }
291
292 /// Adds \p Inst into a partition that is not marked to contain
293 /// dependence cycles.
294 ///
295 // Initially we isolate memory instructions into as many partitions as
296 // possible, then later we may merge them back together.
297 void addToNewNonCyclicPartition(Instruction *Inst) {
298 PartitionContainer.emplace_back(Inst, L);
299 }
300
301 /// Merges adjacent non-cyclic partitions.
302 ///
303 /// The idea is that we currently only want to isolate the non-vectorizable
304 /// partition. We could later allow more distribution among these partition
305 /// too.
306 void mergeAdjacentNonCyclic() {
307 mergeAdjacentPartitionsIf(
308 [](const InstPartition *P) { return !P->hasDepCycle(); });
309 }
310
311 /// If a partition contains only conditional stores, we won't vectorize
312 /// it. Try to merge it with a previous cyclic partition.
313 void mergeNonIfConvertible() {
314 mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
315 if (Partition->hasDepCycle())
316 return true;
317
318 // Now, check if all stores are conditional in this partition.
319 bool seenStore = false;
320
321 for (auto *Inst : *Partition)
322 if (isa<StoreInst>(Inst)) {
323 seenStore = true;
325 return false;
326 }
327 return seenStore;
328 });
329 }
330
331 /// Merges the partitions according to various heuristics.
332 void mergeBeforePopulating() {
333 mergeAdjacentNonCyclic();
335 mergeNonIfConvertible();
336 }
337
338 /// Merges partitions in order to ensure that no loads are duplicated.
339 ///
340 /// We can't duplicate loads because that could potentially reorder them.
341 /// LoopAccessAnalysis provides dependency information with the context that
342 /// the order of memory operation is preserved.
343 ///
344 /// Return if any partitions were merged.
345 bool mergeToAvoidDuplicatedLoads() {
346 using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
347 using ToBeMergedT = EquivalenceClasses<InstPartition *>;
348
349 LoadToPartitionT LoadToPartition;
350 ToBeMergedT ToBeMerged;
351
352 // Step through the partitions and create equivalence between partitions
353 // that contain the same load. Also put partitions in between them in the
354 // same equivalence class to avoid reordering of memory operations.
355 for (PartitionContainerT::iterator I = PartitionContainer.begin(),
356 E = PartitionContainer.end();
357 I != E; ++I) {
358 auto *PartI = &*I;
359
360 // If a load occurs in two partitions PartI and PartJ, merge all
361 // partitions (PartI, PartJ] into PartI.
362 for (Instruction *Inst : *PartI)
363 if (isa<LoadInst>(Inst)) {
364 bool NewElt;
365 LoadToPartitionT::iterator LoadToPart;
366
367 std::tie(LoadToPart, NewElt) =
368 LoadToPartition.insert(std::make_pair(Inst, PartI));
369 if (!NewElt) {
371 dbgs()
372 << "LDist: Merging partitions due to this load in multiple "
373 << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
374 << *Inst << "\n");
375
376 auto PartJ = I;
377 do {
378 --PartJ;
379 ToBeMerged.unionSets(PartI, &*PartJ);
380 } while (&*PartJ != LoadToPart->second);
381 }
382 }
383 }
384 if (ToBeMerged.empty())
385 return false;
386
387 // Merge the member of an equivalence class into its class leader. This
388 // makes the members empty.
389 for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
390 I != E; ++I) {
391 if (!I->isLeader())
392 continue;
393
394 auto PartI = I->getData();
395 for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
396 ToBeMerged.member_end())) {
397 PartJ->moveTo(*PartI);
398 }
399 }
400
401 // Remove the empty partitions.
402 PartitionContainer.remove_if(
403 [](const InstPartition &P) { return P.empty(); });
404
405 return true;
406 }
407
408 /// Sets up the mapping between instructions to partitions. If the
409 /// instruction is duplicated across multiple partitions, set the entry to -1.
410 void setupPartitionIdOnInstructions() {
411 int PartitionID = 0;
412 for (const auto &Partition : PartitionContainer) {
413 for (Instruction *Inst : Partition) {
414 bool NewElt;
416
417 std::tie(Iter, NewElt) =
418 InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
419 if (!NewElt)
420 Iter->second = -1;
421 }
422 ++PartitionID;
423 }
424 }
425
426 /// Populates the partition with everything that the seeding
427 /// instructions require.
428 void populateUsedSet() {
429 for (auto &P : PartitionContainer)
430 P.populateUsedSet();
431 }
432
433 /// This performs the main chunk of the work of cloning the loops for
434 /// the partitions.
435 void cloneLoops() {
436 BasicBlock *OrigPH = L->getLoopPreheader();
437 // At this point the predecessor of the preheader is either the memcheck
438 // block or the top part of the original preheader.
439 BasicBlock *Pred = OrigPH->getSinglePredecessor();
440 assert(Pred && "Preheader does not have a single predecessor");
441 BasicBlock *ExitBlock = L->getExitBlock();
442 assert(ExitBlock && "No single exit block");
443 Loop *NewLoop;
444
445 assert(!PartitionContainer.empty() && "at least two partitions expected");
446 // We're cloning the preheader along with the loop so we already made sure
447 // it was empty.
448 assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
449 "preheader not empty");
450
451 // Preserve the original loop ID for use after the transformation.
452 MDNode *OrigLoopID = L->getLoopID();
453
454 // Create a loop for each partition except the last. Clone the original
455 // loop before PH along with adding a preheader for the cloned loop. Then
456 // update PH to point to the newly added preheader.
457 BasicBlock *TopPH = OrigPH;
458 unsigned Index = getSize() - 1;
459 for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
460 NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
461
462 Part.getVMap()[ExitBlock] = TopPH;
463 Part.remapInstructions();
464 setNewLoopID(OrigLoopID, &Part);
465 --Index;
466 TopPH = NewLoop->getLoopPreheader();
467 }
468 Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
469
470 // Also set a new loop ID for the last loop.
471 setNewLoopID(OrigLoopID, &PartitionContainer.back());
472
473 // Now go in forward order and update the immediate dominator for the
474 // preheaders with the exiting block of the previous loop. Dominance
475 // within the loop is updated in cloneLoopWithPreheader.
476 for (auto Curr = PartitionContainer.cbegin(),
477 Next = std::next(PartitionContainer.cbegin()),
478 E = PartitionContainer.cend();
479 Next != E; ++Curr, ++Next)
481 Next->getDistributedLoop()->getLoopPreheader(),
482 Curr->getDistributedLoop()->getExitingBlock());
483 }
484
485 /// Removes the dead instructions from the cloned loops.
486 void removeUnusedInsts() {
487 for (auto &Partition : PartitionContainer)
488 Partition.removeUnusedInsts();
489 }
490
491 /// For each memory pointer, it computes the partitionId the pointer is
492 /// used in.
493 ///
494 /// This returns an array of int where the I-th entry corresponds to I-th
495 /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
496 /// partitions its entry is set to -1.
498 computePartitionSetForPointers(const LoopAccessInfo &LAI) {
499 const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
500
501 unsigned N = RtPtrCheck->Pointers.size();
502 SmallVector<int, 8> PtrToPartitions(N);
503 for (unsigned I = 0; I < N; ++I) {
504 Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
505 auto Instructions =
506 LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
507
508 int &Partition = PtrToPartitions[I];
509 // First set it to uninitialized.
510 Partition = -2;
511 for (Instruction *Inst : Instructions) {
512 // Note that this could be -1 if Inst is duplicated across multiple
513 // partitions.
514 int ThisPartition = this->InstToPartitionId[Inst];
515 if (Partition == -2)
516 Partition = ThisPartition;
517 // -1 means belonging to multiple partitions.
518 else if (Partition == -1)
519 break;
520 else if (Partition != (int)ThisPartition)
521 Partition = -1;
522 }
523 assert(Partition != -2 && "Pointer not belonging to any partition");
524 }
525
526 return PtrToPartitions;
527 }
528
529 void print(raw_ostream &OS) const {
530 unsigned Index = 0;
531 for (const auto &P : PartitionContainer) {
532 OS << "LDist: Partition " << Index++ << ":";
533 P.print(OS);
534 }
535 }
536
537 void dump() const { print(dbgs()); }
538
539#ifndef NDEBUG
541 const InstPartitionContainer &Partitions) {
542 Partitions.print(OS);
543 return OS;
544 }
545#endif
546
547 void printBlocks(raw_ostream &OS) const {
548 unsigned Index = 0;
549 for (const auto &P : PartitionContainer) {
550 OS << "LDist: Partition " << Index++ << ":";
551 P.printBlocks(OS);
552 }
553 }
554
555private:
556 using PartitionContainerT = std::list<InstPartition>;
557
558 /// List of partitions.
559 PartitionContainerT PartitionContainer;
560
561 /// Mapping from Instruction to partition Id. If the instruction
562 /// belongs to multiple partitions the entry contains -1.
563 InstToPartitionIdT InstToPartitionId;
564
565 Loop *L;
566 LoopInfo *LI;
567 DominatorTree *DT;
568
569 /// The control structure to merge adjacent partitions if both satisfy
570 /// the \p Predicate.
571 template <class UnaryPredicate>
572 void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
573 InstPartition *PrevMatch = nullptr;
574 for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
575 auto DoesMatch = Predicate(&*I);
576 if (PrevMatch == nullptr && DoesMatch) {
577 PrevMatch = &*I;
578 ++I;
579 } else if (PrevMatch != nullptr && DoesMatch) {
580 I->moveTo(*PrevMatch);
581 I = PartitionContainer.erase(I);
582 } else {
583 PrevMatch = nullptr;
584 ++I;
585 }
586 }
587 }
588
589 /// Assign new LoopIDs for the partition's cloned loop.
590 void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
591 std::optional<MDNode *> PartitionID = makeFollowupLoopID(
592 OrigLoopID,
594 Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
596 if (PartitionID) {
597 Loop *NewLoop = Part->getDistributedLoop();
598 NewLoop->setLoopID(*PartitionID);
599 }
600 }
601};
602
603/// For each memory instruction, this class maintains difference of the
604/// number of unsafe dependences that start out from this instruction minus
605/// those that end here.
606///
607/// By traversing the memory instructions in program order and accumulating this
608/// number, we know whether any unsafe dependence crosses over a program point.
609class MemoryInstructionDependences {
611
612public:
613 struct Entry {
614 Instruction *Inst;
615 unsigned NumUnsafeDependencesStartOrEnd = 0;
616
617 Entry(Instruction *Inst) : Inst(Inst) {}
618 };
619
620 using AccessesType = SmallVector<Entry, 8>;
621
622 AccessesType::const_iterator begin() const { return Accesses.begin(); }
623 AccessesType::const_iterator end() const { return Accesses.end(); }
624
625 MemoryInstructionDependences(
626 const SmallVectorImpl<Instruction *> &Instructions,
627 const SmallVectorImpl<Dependence> &Dependences) {
628 Accesses.append(Instructions.begin(), Instructions.end());
629
630 LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
631 for (const auto &Dep : Dependences)
632 if (Dep.isPossiblyBackward()) {
633 // Note that the designations source and destination follow the program
634 // order, i.e. source is always first. (The direction is given by the
635 // DepType.)
636 ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
637 --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
638
639 LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
640 }
641 }
642
643private:
644 AccessesType Accesses;
645};
646
647/// The actual class performing the per-loop work.
648class LoopDistributeForLoop {
649public:
650 LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
653 : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
654 setForced();
655 }
656
657 /// Try to distribute an inner-most loop.
658 bool processLoop() {
659 assert(L->isInnermost() && "Only process inner loops.");
660
661 LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
662 << L->getHeader()->getParent()->getName() << "' from "
663 << L->getLocStr() << "\n");
664
665 // Having a single exit block implies there's also one exiting block.
666 if (!L->getExitBlock())
667 return fail("MultipleExitBlocks", "multiple exit blocks");
668 if (!L->isLoopSimplifyForm())
669 return fail("NotLoopSimplifyForm",
670 "loop is not in loop-simplify form");
671 if (!L->isRotatedForm())
672 return fail("NotBottomTested", "loop is not bottom tested");
673
674 BasicBlock *PH = L->getLoopPreheader();
675
676 LAI = &LAIs.getInfo(*L);
677
678 // Currently, we only distribute to isolate the part of the loop with
679 // dependence cycles to enable partial vectorization.
680 if (LAI->canVectorizeMemory())
681 return fail("MemOpsCanBeVectorized",
682 "memory operations are safe for vectorization");
683
684 auto *Dependences = LAI->getDepChecker().getDependences();
685 if (!Dependences || Dependences->empty())
686 return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
687
688 LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
689 << L->getHeader()->getName() << "\n");
690
691 InstPartitionContainer Partitions(L, LI, DT);
692
693 // First, go through each memory operation and assign them to consecutive
694 // partitions (the order of partitions follows program order). Put those
695 // with unsafe dependences into "cyclic" partition otherwise put each store
696 // in its own "non-cyclic" partition (we'll merge these later).
697 //
698 // Note that a memory operation (e.g. Load2 below) at a program point that
699 // has an unsafe dependence (Store3->Load1) spanning over it must be
700 // included in the same cyclic partition as the dependent operations. This
701 // is to preserve the original program order after distribution. E.g.:
702 //
703 // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
704 // Load1 -. 1 0->1
705 // Load2 | /Unsafe/ 0 1
706 // Store3 -' -1 1->0
707 // Load4 0 0
708 //
709 // NumUnsafeDependencesActive > 0 indicates this situation and in this case
710 // we just keep assigning to the same cyclic partition until
711 // NumUnsafeDependencesActive reaches 0.
712 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
713 MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
714 *Dependences);
715
716 int NumUnsafeDependencesActive = 0;
717 for (const auto &InstDep : MID) {
718 Instruction *I = InstDep.Inst;
719 // We update NumUnsafeDependencesActive post-instruction, catch the
720 // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
721 if (NumUnsafeDependencesActive ||
722 InstDep.NumUnsafeDependencesStartOrEnd > 0)
723 Partitions.addToCyclicPartition(I);
724 else
725 Partitions.addToNewNonCyclicPartition(I);
726 NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
727 assert(NumUnsafeDependencesActive >= 0 &&
728 "Negative number of dependences active");
729 }
730
731 // Add partitions for values used outside. These partitions can be out of
732 // order from the original program order. This is OK because if the
733 // partition uses a load we will merge this partition with the original
734 // partition of the load that we set up in the previous loop (see
735 // mergeToAvoidDuplicatedLoads).
736 auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
737 for (auto *Inst : DefsUsedOutside)
738 Partitions.addToNewNonCyclicPartition(Inst);
739
740 LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
741 if (Partitions.getSize() < 2)
742 return fail("CantIsolateUnsafeDeps",
743 "cannot isolate unsafe dependencies");
744
745 // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
746 // should be able to vectorize these together.
747 Partitions.mergeBeforePopulating();
748 LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
749 if (Partitions.getSize() < 2)
750 return fail("CantIsolateUnsafeDeps",
751 "cannot isolate unsafe dependencies");
752
753 // Now, populate the partitions with non-memory operations.
754 Partitions.populateUsedSet();
755 LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
756
757 // In order to preserve original lexical order for loads, keep them in the
758 // partition that we set up in the MemoryInstructionDependences loop.
759 if (Partitions.mergeToAvoidDuplicatedLoads()) {
760 LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
761 << Partitions);
762 if (Partitions.getSize() < 2)
763 return fail("CantIsolateUnsafeDeps",
764 "cannot isolate unsafe dependencies");
765 }
766
767 // Don't distribute the loop if we need too many SCEV run-time checks, or
768 // any if it's illegal.
769 const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
770 if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
771 return fail("RuntimeCheckWithConvergent",
772 "may not insert runtime check with convergent operation");
773 }
774
775 if (Pred.getComplexity() > (IsForced.value_or(false)
778 return fail("TooManySCEVRuntimeChecks",
779 "too many SCEV run-time checks needed.\n");
780
781 if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
782 return fail("HeuristicDisabled", "distribution heuristic disabled");
783
784 LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
785 << L->getHeader()->getName() << "\n");
786 // We're done forming the partitions set up the reverse mapping from
787 // instructions to partitions.
788 Partitions.setupPartitionIdOnInstructions();
789
790 // If we need run-time checks, version the loop now.
791 auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
792 const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
793 const auto &AllChecks = RtPtrChecking->getChecks();
794 auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
795 RtPtrChecking);
796
797 if (LAI->hasConvergentOp() && !Checks.empty()) {
798 return fail("RuntimeCheckWithConvergent",
799 "may not insert runtime check with convergent operation");
800 }
801
802 // To keep things simple have an empty preheader before we version or clone
803 // the loop. (Also split if this has no predecessor, i.e. entry, because we
804 // rely on PH having a predecessor.)
805 if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
806 SplitBlock(PH, PH->getTerminator(), DT, LI);
807
808 if (!Pred.isAlwaysTrue() || !Checks.empty()) {
809 assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
810
811 MDNode *OrigLoopID = L->getLoopID();
812
813 LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
815 LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
816 LVer.versionLoop(DefsUsedOutside);
817 LVer.annotateLoopWithNoAlias();
818
819 // The unversioned loop will not be changed, so we inherit all attributes
820 // from the original loop, but remove the loop distribution metadata to
821 // avoid to distribute it again.
822 MDNode *UnversionedLoopID = *makeFollowupLoopID(
823 OrigLoopID,
825 "llvm.loop.distribute.", true);
826 LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
827 }
828
829 // Create identical copies of the original loop for each partition and hook
830 // them up sequentially.
831 Partitions.cloneLoops();
832
833 // Now, we remove the instruction from each loop that don't belong to that
834 // partition.
835 Partitions.removeUnusedInsts();
836 LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
837 LLVM_DEBUG(Partitions.printBlocks(dbgs()));
838
839 if (LDistVerify) {
840 LI->verify(*DT);
841 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
842 }
843
844 ++NumLoopsDistributed;
845 // Report the success.
846 ORE->emit([&]() {
847 return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
848 L->getHeader())
849 << "distributed loop";
850 });
851 return true;
852 }
853
854 /// Provide diagnostics then \return with false.
855 bool fail(StringRef RemarkName, StringRef Message) {
856 LLVMContext &Ctx = F->getContext();
857 bool Forced = isForced().value_or(false);
858
859 LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
860
861 // With Rpass-missed report that distribution failed.
862 ORE->emit([&]() {
863 return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
864 L->getStartLoc(), L->getHeader())
865 << "loop not distributed: use -Rpass-analysis=loop-distribute for "
866 "more "
867 "info";
868 });
869
870 // With Rpass-analysis report why. This is on by default if distribution
871 // was requested explicitly.
874 RemarkName, L->getStartLoc(), L->getHeader())
875 << "loop not distributed: " << Message);
876
877 // Also issue a warning if distribution was requested explicitly but it
878 // failed.
879 if (Forced)
881 *F, L->getStartLoc(), "loop not distributed: failed "
882 "explicitly specified loop distribution"));
883
884 return false;
885 }
886
887 /// Return if distribution forced to be enabled/disabled for the loop.
888 ///
889 /// If the optional has a value, it indicates whether distribution was forced
890 /// to be enabled (true) or disabled (false). If the optional has no value
891 /// distribution was not forced either way.
892 const std::optional<bool> &isForced() const { return IsForced; }
893
894private:
895 /// Filter out checks between pointers from the same partition.
896 ///
897 /// \p PtrToPartition contains the partition number for pointers. Partition
898 /// number -1 means that the pointer is used in multiple partitions. In this
899 /// case we can't safely omit the check.
900 SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
902 const SmallVectorImpl<int> &PtrToPartition,
903 const RuntimePointerChecking *RtPtrChecking) {
905
906 copy_if(AllChecks, std::back_inserter(Checks),
907 [&](const RuntimePointerCheck &Check) {
908 for (unsigned PtrIdx1 : Check.first->Members)
909 for (unsigned PtrIdx2 : Check.second->Members)
910 // Only include this check if there is a pair of pointers
911 // that require checking and the pointers fall into
912 // separate partitions.
913 //
914 // (Note that we already know at this point that the two
915 // pointer groups need checking but it doesn't follow
916 // that each pair of pointers within the two groups need
917 // checking as well.
918 //
919 // In other words we don't want to include a check just
920 // because there is a pair of pointers between the two
921 // pointer groups that require checks and a different
922 // pair whose pointers fall into different partitions.)
923 if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
925 PtrToPartition, PtrIdx1, PtrIdx2))
926 return true;
927 return false;
928 });
929
930 return Checks;
931 }
932
933 /// Check whether the loop metadata is forcing distribution to be
934 /// enabled/disabled.
935 void setForced() {
936 std::optional<const MDOperand *> Value =
937 findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
938 if (!Value)
939 return;
940
941 const MDOperand *Op = *Value;
942 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
943 IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
944 }
945
946 Loop *L;
947 Function *F;
948
949 // Analyses used.
950 LoopInfo *LI;
951 const LoopAccessInfo *LAI = nullptr;
952 DominatorTree *DT;
953 ScalarEvolution *SE;
956
957 /// Indicates whether distribution is forced to be enabled/disabled for
958 /// the loop.
959 ///
960 /// If the optional has a value, it indicates whether distribution was forced
961 /// to be enabled (true) or disabled (false). If the optional has no value
962 /// distribution was not forced either way.
963 std::optional<bool> IsForced;
964};
965
966} // end anonymous namespace
967
968static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
970 LoopAccessInfoManager &LAIs) {
971 // Build up a worklist of inner-loops to distribute. This is necessary as the
972 // act of distributing a loop creates new loops and can invalidate iterators
973 // across the loops.
974 SmallVector<Loop *, 8> Worklist;
975
976 for (Loop *TopLevelLoop : *LI)
977 for (Loop *L : depth_first(TopLevelLoop))
978 // We only handle inner-most loops.
979 if (L->isInnermost())
980 Worklist.push_back(L);
981
982 // Now walk the identified inner loops.
983 bool Changed = false;
984 for (Loop *L : Worklist) {
985 LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
986
987 // If distribution was forced for the specific loop to be
988 // enabled/disabled, follow that. Otherwise use the global flag.
989 if (LDL.isForced().value_or(EnableLoopDistribute))
990 Changed |= LDL.processLoop();
991 }
992
993 // Process each loop nest in the function.
994 return Changed;
995}
996
999 auto &LI = AM.getResult<LoopAnalysis>(F);
1000 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1001 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1003
1005 bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
1006 if (!Changed)
1007 return PreservedAnalyses::all();
1009 PA.preserve<LoopAnalysis>();
1011 return PA;
1012}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runImpl(Function &F, const TargetLowering &TLI)
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header provides classes for managing per-loop analyses.
static const char *const LLVMLoopDistributeFollowupCoincident
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
#define LDIST_NAME
static const char *const LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupAll
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma clang loop " "distribute(enable)"))
static const char *const LLVMLoopDistributeFollowupFallback
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, LoopAccessInfoManager &LAIs)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
static unsigned getSize(unsigned Kind)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
This class represents an Operation in the Expression.
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
Dependence - This class represents a dependence between two memory memory references in a function.
Diagnostic information for optimization failures.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This analysis provides dependence information for the memory accesses of a loop.
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:571
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
Metadata node.
Definition: Metadata.h:1067
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
const SCEVPredicate & getPredicate() const
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
typename vector_type::const_iterator iterator
Definition: SetVector.h:69
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:70
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
LLVM Value Representation.
Definition: Value.h:74
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:227
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ Entry
Definition: COFF.h:811
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1065
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
#define N
Dependece between memory access instructions.