LLVM 22.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Loop Distribution Pass. Its main focus is to
10// distribute loops that cannot be vectorized due to dependence cycles. It
11// tries to isolate the offending dependences into a new loop allowing
12// vectorization of the remaining parts.
13//
14// For dependence analysis, the pass uses the LoopVectorizer's
15// LoopAccessAnalysis. Because this analysis presumes no change in the order of
16// memory operations, special care is taken to preserve the lexical order of
17// these operations.
18//
19// Similarly to the Vectorizer, the pass also supports loop versioning to
20// run-time disambiguate potentially overlapping arrays.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SetVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/Twine.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constants.h"
47#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/Instruction.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Value.h"
57#include "llvm/Support/Debug.h"
65#include <cassert>
66#include <list>
67#include <tuple>
68
69using namespace llvm;
70
71#define LDIST_NAME "loop-distribute"
72#define DEBUG_TYPE LDIST_NAME
73
74/// @{
75/// Metadata attribute names
76static const char *const LLVMLoopDistributeFollowupAll =
77 "llvm.loop.distribute.followup_all";
78static const char *const LLVMLoopDistributeFollowupCoincident =
79 "llvm.loop.distribute.followup_coincident";
80static const char *const LLVMLoopDistributeFollowupSequential =
81 "llvm.loop.distribute.followup_sequential";
82static const char *const LLVMLoopDistributeFollowupFallback =
83 "llvm.loop.distribute.followup_fallback";
84/// @}
85
86static cl::opt<bool>
87 LDistVerify("loop-distribute-verify", cl::Hidden,
88 cl::desc("Turn on DominatorTree and LoopInfo verification "
89 "after Loop Distribution"),
90 cl::init(false));
91
93 "loop-distribute-non-if-convertible", cl::Hidden,
94 cl::desc("Whether to distribute into a loop that may not be "
95 "if-convertible by the loop vectorizer"),
96 cl::init(false));
97
99 "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
100 cl::desc("The maximum number of SCEV checks allowed for Loop "
101 "Distribution"));
102
104 "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
106 cl::desc("The maximum number of SCEV checks allowed for Loop "
107 "Distribution for loop marked with #pragma clang loop "
108 "distribute(enable)"));
109
111 "enable-loop-distribute", cl::Hidden,
112 cl::desc("Enable the new, experimental LoopDistribution Pass"),
113 cl::init(false));
114
115static const char *DistributedMetaData = "llvm.loop.isdistributed";
116
117STATISTIC(NumLoopsDistributed, "Number of loops distributed");
118
119namespace {
120
121/// Maintains the set of instructions of the loop for a partition before
122/// cloning. After cloning, it hosts the new loop.
123class InstPartition {
124 using InstructionSet = SmallSetVector<Instruction *, 8>;
125
126public:
127 InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
128 : DepCycle(DepCycle), OrigLoop(L) {
129 Set.insert(I);
130 }
131
132 /// Returns whether this partition contains a dependence cycle.
133 bool hasDepCycle() const { return DepCycle; }
134
135 /// Adds an instruction to this partition.
136 void add(Instruction *I) { Set.insert(I); }
137
138 /// Collection accessors.
139 InstructionSet::iterator begin() { return Set.begin(); }
140 InstructionSet::iterator end() { return Set.end(); }
141 InstructionSet::const_iterator begin() const { return Set.begin(); }
142 InstructionSet::const_iterator end() const { return Set.end(); }
143 bool empty() const { return Set.empty(); }
144
145 /// Moves this partition into \p Other. This partition becomes empty
146 /// after this.
147 void moveTo(InstPartition &Other) {
148 Other.Set.insert_range(Set);
149 Set.clear();
150 Other.DepCycle |= DepCycle;
151 }
152
153 /// Populates the partition with a transitive closure of all the
154 /// instructions that the seeded instructions dependent on.
155 void populateUsedSet() {
156 // FIXME: We currently don't use control-dependence but simply include all
157 // blocks (possibly empty at the end) and let simplifycfg mostly clean this
158 // up.
159 for (auto *B : OrigLoop->getBlocks())
160 Set.insert(B->getTerminator());
161
162 // Follow the use-def chains to form a transitive closure of all the
163 // instructions that the originally seeded instructions depend on.
164 SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
165 while (!Worklist.empty()) {
166 Instruction *I = Worklist.pop_back_val();
167 // Insert instructions from the loop that we depend on.
168 for (Value *V : I->operand_values()) {
169 auto *I = dyn_cast<Instruction>(V);
170 if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
171 Worklist.push_back(I);
172 }
173 }
174 }
175
176 /// Clones the original loop.
177 ///
178 /// Updates LoopInfo and DominatorTree using the information that block \p
179 /// LoopDomBB dominates the loop.
180 Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
181 unsigned Index, LoopInfo *LI,
182 DominatorTree *DT) {
183 ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
184 VMap, Twine(".ldist") + Twine(Index),
185 LI, DT, ClonedLoopBlocks);
186 return ClonedLoop;
187 }
188
189 /// The cloned loop. If this partition is mapped to the original loop,
190 /// this is null.
191 const Loop *getClonedLoop() const { return ClonedLoop; }
192
193 /// Returns the loop where this partition ends up after distribution.
194 /// If this partition is mapped to the original loop then use the block from
195 /// the loop.
196 Loop *getDistributedLoop() const {
197 return ClonedLoop ? ClonedLoop : OrigLoop;
198 }
199
200 /// The VMap that is populated by cloning and then used in
201 /// remapinstruction to remap the cloned instructions.
202 ValueToValueMapTy &getVMap() { return VMap; }
203
204 /// Remaps the cloned instructions using VMap.
205 void remapInstructions() {
206 remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
207 }
208
209 /// Based on the set of instructions selected for this partition,
210 /// removes the unnecessary ones.
211 void removeUnusedInsts() {
212 SmallVector<Instruction *, 8> Unused;
213
214 for (auto *Block : OrigLoop->getBlocks())
215 for (auto &Inst : *Block)
216 if (!Set.count(&Inst)) {
217 Instruction *NewInst = &Inst;
218 if (!VMap.empty())
219 NewInst = cast<Instruction>(VMap[NewInst]);
220
221 assert(!isa<BranchInst>(NewInst) &&
222 "Branches are marked used early on");
223 Unused.push_back(NewInst);
224 }
225
226 // Delete the instructions backwards, as it has a reduced likelihood of
227 // having to update as many def-use and use-def chains.
228 for (auto *Inst : reverse(Unused)) {
229 salvageDebugInfo(*Inst);
230 if (!Inst->use_empty())
231 Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
232 Inst->eraseFromParent();
233 }
234 }
235
236 void print(raw_ostream &OS) const {
237 OS << (DepCycle ? " (cycle)\n" : "\n");
238 for (auto *I : Set)
239 // Prefix with the block name.
240 OS << " " << I->getParent()->getName() << ":" << *I << "\n";
241 }
242
243 void printBlocks(raw_ostream &OS) const {
244 for (auto *BB : getDistributedLoop()->getBlocks())
245 OS << *BB;
246 }
247
248private:
249 /// Instructions from OrigLoop selected for this partition.
250 InstructionSet Set;
251
252 /// Whether this partition contains a dependence cycle.
253 bool DepCycle;
254
255 /// The original loop.
256 Loop *OrigLoop;
257
258 /// The cloned loop. If this partition is mapped to the original loop,
259 /// this is null.
260 Loop *ClonedLoop = nullptr;
261
262 /// The blocks of ClonedLoop including the preheader. If this
263 /// partition is mapped to the original loop, this is empty.
264 SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
265
266 /// These gets populated once the set of instructions have been
267 /// finalized. If this partition is mapped to the original loop, these are not
268 /// set.
270};
271
272/// Holds the set of Partitions. It populates them, merges them and then
273/// clones the loops.
274class InstPartitionContainer {
275 using InstToPartitionIdT = DenseMap<Instruction *, int>;
276
277public:
278 InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
279 : L(L), LI(LI), DT(DT) {}
280
281 /// Returns the number of partitions.
282 unsigned getSize() const { return PartitionContainer.size(); }
283
284 /// Adds \p Inst into the current partition if that is marked to
285 /// contain cycles. Otherwise start a new partition for it.
286 void addToCyclicPartition(Instruction *Inst) {
287 // If the current partition is non-cyclic. Start a new one.
288 if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
289 PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
290 else
291 PartitionContainer.back().add(Inst);
292 }
293
294 /// Adds \p Inst into a partition that is not marked to contain
295 /// dependence cycles.
296 ///
297 // Initially we isolate memory instructions into as many partitions as
298 // possible, then later we may merge them back together.
299 void addToNewNonCyclicPartition(Instruction *Inst) {
300 PartitionContainer.emplace_back(Inst, L);
301 }
302
303 /// Merges adjacent non-cyclic partitions.
304 ///
305 /// The idea is that we currently only want to isolate the non-vectorizable
306 /// partition. We could later allow more distribution among these partition
307 /// too.
308 void mergeAdjacentNonCyclic() {
309 mergeAdjacentPartitionsIf(
310 [](const InstPartition *P) { return !P->hasDepCycle(); });
311 }
312
313 /// If a partition contains only conditional stores, we won't vectorize
314 /// it. Try to merge it with a previous cyclic partition.
315 void mergeNonIfConvertible() {
316 mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
317 if (Partition->hasDepCycle())
318 return true;
319
320 // Now, check if all stores are conditional in this partition.
321 bool seenStore = false;
322
323 for (auto *Inst : *Partition)
324 if (isa<StoreInst>(Inst)) {
325 seenStore = true;
327 return false;
328 }
329 return seenStore;
330 });
331 }
332
333 /// Merges the partitions according to various heuristics.
334 void mergeBeforePopulating() {
335 mergeAdjacentNonCyclic();
337 mergeNonIfConvertible();
338 }
339
340 /// Merges partitions in order to ensure that no loads are duplicated.
341 ///
342 /// We can't duplicate loads because that could potentially reorder them.
343 /// LoopAccessAnalysis provides dependency information with the context that
344 /// the order of memory operation is preserved.
345 ///
346 /// Return if any partitions were merged.
347 bool mergeToAvoidDuplicatedLoads() {
348 using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
349 using ToBeMergedT = EquivalenceClasses<InstPartition *>;
350
351 LoadToPartitionT LoadToPartition;
352 ToBeMergedT ToBeMerged;
353
354 // Step through the partitions and create equivalence between partitions
355 // that contain the same load. Also put partitions in between them in the
356 // same equivalence class to avoid reordering of memory operations.
357 for (PartitionContainerT::iterator I = PartitionContainer.begin(),
358 E = PartitionContainer.end();
359 I != E; ++I) {
360 auto *PartI = &*I;
361
362 // If a load occurs in two partitions PartI and PartJ, merge all
363 // partitions (PartI, PartJ] into PartI.
364 for (Instruction *Inst : *PartI)
365 if (isa<LoadInst>(Inst)) {
366 bool NewElt;
367 LoadToPartitionT::iterator LoadToPart;
368
369 std::tie(LoadToPart, NewElt) =
370 LoadToPartition.insert(std::make_pair(Inst, PartI));
371 if (!NewElt) {
373 dbgs()
374 << "LDist: Merging partitions due to this load in multiple "
375 << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
376 << *Inst << "\n");
377
378 auto PartJ = I;
379 do {
380 --PartJ;
381 ToBeMerged.unionSets(PartI, &*PartJ);
382 } while (&*PartJ != LoadToPart->second);
383 }
384 }
385 }
386 if (ToBeMerged.empty())
387 return false;
388
389 // Merge the member of an equivalence class into its class leader. This
390 // makes the members empty.
391 for (const auto &C : ToBeMerged) {
392 if (!C->isLeader())
393 continue;
394
395 auto PartI = C->getData();
396 for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(*C)),
397 ToBeMerged.member_end())) {
398 PartJ->moveTo(*PartI);
399 }
400 }
401
402 // Remove the empty partitions.
403 PartitionContainer.remove_if(
404 [](const InstPartition &P) { return P.empty(); });
405
406 return true;
407 }
408
409 /// Sets up the mapping between instructions to partitions. If the
410 /// instruction is duplicated across multiple partitions, set the entry to -1.
411 void setupPartitionIdOnInstructions() {
412 int PartitionID = 0;
413 for (const auto &Partition : PartitionContainer) {
414 for (Instruction *Inst : Partition) {
415 bool NewElt;
417
418 std::tie(Iter, NewElt) =
419 InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
420 if (!NewElt)
421 Iter->second = -1;
422 }
423 ++PartitionID;
424 }
425 }
426
427 /// Populates the partition with everything that the seeding
428 /// instructions require.
429 void populateUsedSet() {
430 for (auto &P : PartitionContainer)
431 P.populateUsedSet();
432 }
433
434 /// This performs the main chunk of the work of cloning the loops for
435 /// the partitions.
436 void cloneLoops() {
437 BasicBlock *OrigPH = L->getLoopPreheader();
438 // At this point the predecessor of the preheader is either the memcheck
439 // block or the top part of the original preheader.
440 BasicBlock *Pred = OrigPH->getSinglePredecessor();
441 assert(Pred && "Preheader does not have a single predecessor");
442 BasicBlock *ExitBlock = L->getExitBlock();
443 assert(ExitBlock && "No single exit block");
444 Loop *NewLoop;
445
446 assert(!PartitionContainer.empty() && "at least two partitions expected");
447 // We're cloning the preheader along with the loop so we already made sure
448 // it was empty.
449 assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
450 "preheader not empty");
451
452 // Preserve the original loop ID for use after the transformation.
453 MDNode *OrigLoopID = L->getLoopID();
454
455 // Create a loop for each partition except the last. Clone the original
456 // loop before PH along with adding a preheader for the cloned loop. Then
457 // update PH to point to the newly added preheader.
458 BasicBlock *TopPH = OrigPH;
459 unsigned Index = getSize() - 1;
460 for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
461 NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
462
463 Part.getVMap()[ExitBlock] = TopPH;
464 Part.remapInstructions();
465 setNewLoopID(OrigLoopID, &Part);
466 --Index;
467 TopPH = NewLoop->getLoopPreheader();
468 }
469 Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
470
471 // Also set a new loop ID for the last loop.
472 setNewLoopID(OrigLoopID, &PartitionContainer.back());
473
474 // Now go in forward order and update the immediate dominator for the
475 // preheaders with the exiting block of the previous loop. Dominance
476 // within the loop is updated in cloneLoopWithPreheader.
477 for (auto Curr = PartitionContainer.cbegin(),
478 Next = std::next(PartitionContainer.cbegin()),
479 E = PartitionContainer.cend();
480 Next != E; ++Curr, ++Next)
481 DT->changeImmediateDominator(
482 Next->getDistributedLoop()->getLoopPreheader(),
483 Curr->getDistributedLoop()->getExitingBlock());
484 }
485
486 /// Removes the dead instructions from the cloned loops.
487 void removeUnusedInsts() {
488 for (auto &Partition : PartitionContainer)
489 Partition.removeUnusedInsts();
490 }
491
492 /// For each memory pointer, it computes the partitionId the pointer is
493 /// used in.
494 ///
495 /// This returns an array of int where the I-th entry corresponds to I-th
496 /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
497 /// partitions its entry is set to -1.
498 SmallVector<int, 8>
499 computePartitionSetForPointers(const LoopAccessInfo &LAI) {
500 const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
501
502 unsigned N = RtPtrCheck->Pointers.size();
503 SmallVector<int, 8> PtrToPartitions(N);
504 for (unsigned I = 0; I < N; ++I) {
505 Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
506 auto Instructions = LAI.getInstructionsForAccess(Ptr, /* IsWrite */ true);
507 auto ReadInstructions =
508 LAI.getInstructionsForAccess(Ptr, /* IsWrite */ false);
509 Instructions.append(ReadInstructions.begin(), ReadInstructions.end());
510
511 int &Partition = PtrToPartitions[I];
512 // First set it to uninitialized.
513 Partition = -2;
514 for (Instruction *Inst : Instructions) {
515 // Note that this could be -1 if Inst is duplicated across multiple
516 // partitions.
517 int ThisPartition = this->InstToPartitionId[Inst];
518 if (Partition == -2)
519 Partition = ThisPartition;
520 // -1 means belonging to multiple partitions.
521 else if (Partition == -1)
522 break;
523 else if (Partition != ThisPartition)
524 Partition = -1;
525 }
526 assert(Partition != -2 && "Pointer not belonging to any partition");
527 }
528
529 return PtrToPartitions;
530 }
531
532 void print(raw_ostream &OS) const {
533 unsigned Index = 0;
534 for (const auto &P : PartitionContainer) {
535 OS << "LDist: Partition " << Index++ << ":";
536 P.print(OS);
537 }
538 }
539
540 void dump() const { print(dbgs()); }
541
542#ifndef NDEBUG
543 friend raw_ostream &operator<<(raw_ostream &OS,
544 const InstPartitionContainer &Partitions) {
545 Partitions.print(OS);
546 return OS;
547 }
548#endif
549
550 void printBlocks(raw_ostream &OS) const {
551 unsigned Index = 0;
552 for (const auto &P : PartitionContainer) {
553 OS << "LDist: Partition " << Index++ << ":";
554 P.printBlocks(OS);
555 }
556 }
557
558private:
559 using PartitionContainerT = std::list<InstPartition>;
560
561 /// List of partitions.
562 PartitionContainerT PartitionContainer;
563
564 /// Mapping from Instruction to partition Id. If the instruction
565 /// belongs to multiple partitions the entry contains -1.
566 InstToPartitionIdT InstToPartitionId;
567
568 Loop *L;
569 LoopInfo *LI;
570 DominatorTree *DT;
571
572 /// The control structure to merge adjacent partitions if both satisfy
573 /// the \p Predicate.
574 template <class UnaryPredicate>
575 void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
576 InstPartition *PrevMatch = nullptr;
577 for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
578 auto DoesMatch = Predicate(&*I);
579 if (PrevMatch == nullptr && DoesMatch) {
580 PrevMatch = &*I;
581 ++I;
582 } else if (PrevMatch != nullptr && DoesMatch) {
583 I->moveTo(*PrevMatch);
584 I = PartitionContainer.erase(I);
585 } else {
586 PrevMatch = nullptr;
587 ++I;
588 }
589 }
590 }
591
592 /// Assign new LoopIDs for the partition's cloned loop.
593 void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
594 std::optional<MDNode *> PartitionID = makeFollowupLoopID(
595 OrigLoopID,
597 Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
599 if (PartitionID) {
600 Loop *NewLoop = Part->getDistributedLoop();
601 NewLoop->setLoopID(*PartitionID);
602 }
603 }
604};
605
606/// For each memory instruction, this class maintains difference of the
607/// number of unsafe dependences that start out from this instruction minus
608/// those that end here.
609///
610/// By traversing the memory instructions in program order and accumulating this
611/// number, we know whether any unsafe dependence crosses over a program point.
612class MemoryInstructionDependences {
613 using Dependence = MemoryDepChecker::Dependence;
614
615public:
616 struct Entry {
617 Instruction *Inst;
618 unsigned NumUnsafeDependencesStartOrEnd = 0;
619
620 Entry(Instruction *Inst) : Inst(Inst) {}
621 };
622
623 using AccessesType = SmallVector<Entry, 8>;
624
625 AccessesType::const_iterator begin() const { return Accesses.begin(); }
626 AccessesType::const_iterator end() const { return Accesses.end(); }
627
628 MemoryInstructionDependences(
629 const SmallVectorImpl<Instruction *> &Instructions,
630 const SmallVectorImpl<Dependence> &Dependences) {
631 Accesses.append(Instructions.begin(), Instructions.end());
632
633 LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
634 for (const auto &Dep : Dependences)
635 if (Dep.isPossiblyBackward()) {
636 // Note that the designations source and destination follow the program
637 // order, i.e. source is always first. (The direction is given by the
638 // DepType.)
639 ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
640 --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
641
642 LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
643 }
644 }
645
646private:
647 AccessesType Accesses;
648};
649
650/// The actual class performing the per-loop work.
651class LoopDistributeForLoop {
652public:
653 LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
654 ScalarEvolution *SE, LoopAccessInfoManager &LAIs,
655 OptimizationRemarkEmitter *ORE)
656 : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
657 setForced();
658 }
659
660 /// Try to distribute an inner-most loop.
661 bool processLoop() {
662 assert(L->isInnermost() && "Only process inner loops.");
663
664 LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
665 << L->getHeader()->getParent()->getName() << "' from "
666 << L->getLocStr() << "\n");
667
668 // Having a single exit block implies there's also one exiting block.
669 if (!L->getExitBlock())
670 return fail("MultipleExitBlocks", "multiple exit blocks");
671 if (!L->isLoopSimplifyForm())
672 return fail("NotLoopSimplifyForm",
673 "loop is not in loop-simplify form");
674 if (!L->isRotatedForm())
675 return fail("NotBottomTested", "loop is not bottom tested");
676
677 BasicBlock *PH = L->getLoopPreheader();
678
679 LAI = &LAIs.getInfo(*L);
680
681 // Currently, we only distribute to isolate the part of the loop with
682 // dependence cycles to enable partial vectorization.
683 if (LAI->canVectorizeMemory())
684 return fail("MemOpsCanBeVectorized",
685 "memory operations are safe for vectorization");
686
687 auto *Dependences = LAI->getDepChecker().getDependences();
688 if (!Dependences || Dependences->empty())
689 return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
690
691 LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
692 << L->getHeader()->getName() << "\n");
693
694 InstPartitionContainer Partitions(L, LI, DT);
695
696 // First, go through each memory operation and assign them to consecutive
697 // partitions (the order of partitions follows program order). Put those
698 // with unsafe dependences into "cyclic" partition otherwise put each store
699 // in its own "non-cyclic" partition (we'll merge these later).
700 //
701 // Note that a memory operation (e.g. Load2 below) at a program point that
702 // has an unsafe dependence (Store3->Load1) spanning over it must be
703 // included in the same cyclic partition as the dependent operations. This
704 // is to preserve the original program order after distribution. E.g.:
705 //
706 // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
707 // Load1 -. 1 0->1
708 // Load2 | /Unsafe/ 0 1
709 // Store3 -' -1 1->0
710 // Load4 0 0
711 //
712 // NumUnsafeDependencesActive > 0 indicates this situation and in this case
713 // we just keep assigning to the same cyclic partition until
714 // NumUnsafeDependencesActive reaches 0.
715 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
716 MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
717 *Dependences);
718
719 int NumUnsafeDependencesActive = 0;
720 for (const auto &InstDep : MID) {
721 Instruction *I = InstDep.Inst;
722 // We update NumUnsafeDependencesActive post-instruction, catch the
723 // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
724 if (NumUnsafeDependencesActive ||
725 InstDep.NumUnsafeDependencesStartOrEnd > 0)
726 Partitions.addToCyclicPartition(I);
727 else
728 Partitions.addToNewNonCyclicPartition(I);
729 NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
730 assert(NumUnsafeDependencesActive >= 0 &&
731 "Negative number of dependences active");
732 }
733
734 // Add partitions for values used outside. These partitions can be out of
735 // order from the original program order. This is OK because if the
736 // partition uses a load we will merge this partition with the original
737 // partition of the load that we set up in the previous loop (see
738 // mergeToAvoidDuplicatedLoads).
739 auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
740 for (auto *Inst : DefsUsedOutside)
741 Partitions.addToNewNonCyclicPartition(Inst);
742
743 LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
744 if (Partitions.getSize() < 2)
745 return fail("CantIsolateUnsafeDeps",
746 "cannot isolate unsafe dependencies");
747
748 // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
749 // should be able to vectorize these together.
750 Partitions.mergeBeforePopulating();
751 LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
752 if (Partitions.getSize() < 2)
753 return fail("CantIsolateUnsafeDeps",
754 "cannot isolate unsafe dependencies");
755
756 // Now, populate the partitions with non-memory operations.
757 Partitions.populateUsedSet();
758 LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
759
760 // In order to preserve original lexical order for loads, keep them in the
761 // partition that we set up in the MemoryInstructionDependences loop.
762 if (Partitions.mergeToAvoidDuplicatedLoads()) {
763 LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
764 << Partitions);
765 if (Partitions.getSize() < 2)
766 return fail("CantIsolateUnsafeDeps",
767 "cannot isolate unsafe dependencies");
768 }
769
770 // Don't distribute the loop if we need too many SCEV run-time checks, or
771 // any if it's illegal.
772 const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
773 if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
774 return fail("RuntimeCheckWithConvergent",
775 "may not insert runtime check with convergent operation");
776 }
777
778 if (Pred.getComplexity() > (IsForced.value_or(false)
781 return fail("TooManySCEVRuntimeChecks",
782 "too many SCEV run-time checks needed.\n");
783
784 if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
785 return fail("HeuristicDisabled", "distribution heuristic disabled");
786
787 LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
788 << L->getHeader()->getName() << "\n");
789 // We're done forming the partitions set up the reverse mapping from
790 // instructions to partitions.
791 Partitions.setupPartitionIdOnInstructions();
792
793 // If we need run-time checks, version the loop now.
794 auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
795 const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
796 const auto &AllChecks = RtPtrChecking->getChecks();
797 auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
798 RtPtrChecking);
799
800 if (LAI->hasConvergentOp() && !Checks.empty()) {
801 return fail("RuntimeCheckWithConvergent",
802 "may not insert runtime check with convergent operation");
803 }
804
805 // To keep things simple have an empty preheader before we version or clone
806 // the loop. (Also split if this has no predecessor, i.e. entry, because we
807 // rely on PH having a predecessor.)
808 if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
809 SplitBlock(PH, PH->getTerminator(), DT, LI);
810
811 if (!Pred.isAlwaysTrue() || !Checks.empty()) {
812 assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
813
814 MDNode *OrigLoopID = L->getLoopID();
815
816 LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
817 LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
818 LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
819 LVer.versionLoop(DefsUsedOutside);
820 LVer.annotateLoopWithNoAlias();
821
822 // The unversioned loop will not be changed, so we inherit all attributes
823 // from the original loop, but remove the loop distribution metadata to
824 // avoid to distribute it again.
825 MDNode *UnversionedLoopID = *makeFollowupLoopID(
826 OrigLoopID,
828 "llvm.loop.distribute.", true);
829 LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
830 addStringMetadataToLoop(LVer.getNonVersionedLoop(), DistributedMetaData,
831 true);
832 }
833
834 // Create identical copies of the original loop for each partition and hook
835 // them up sequentially.
836 Partitions.cloneLoops();
837
838 // Now, we remove the instruction from each loop that don't belong to that
839 // partition.
840 Partitions.removeUnusedInsts();
841 LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
842 LLVM_DEBUG(Partitions.printBlocks(dbgs()));
843
844 if (LDistVerify) {
845 LI->verify(*DT);
846 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
847 }
848
849 ++NumLoopsDistributed;
850 // Report the success.
851 ORE->emit([&]() {
852 return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
853 L->getHeader())
854 << "distributed loop";
855 });
856 return true;
857 }
858
859 /// Provide diagnostics then \return with false.
860 bool fail(StringRef RemarkName, StringRef Message) {
861 LLVMContext &Ctx = F->getContext();
862 bool Forced = isForced().value_or(false);
863
864 LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
865
866 // With Rpass-missed report that distribution failed.
867 ORE->emit([&]() {
868 return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
869 L->getStartLoc(), L->getHeader())
870 << "loop not distributed: use -Rpass-analysis=loop-distribute for "
871 "more "
872 "info";
873 });
874
875 // With Rpass-analysis report why. This is on by default if distribution
876 // was requested explicitly.
877 ORE->emit(OptimizationRemarkAnalysis(
879 RemarkName, L->getStartLoc(), L->getHeader())
880 << "loop not distributed: " << Message);
881
882 // Also issue a warning if distribution was requested explicitly but it
883 // failed.
884 if (Forced)
885 Ctx.diagnose(DiagnosticInfoOptimizationFailure(
886 *F, L->getStartLoc(), "loop not distributed: failed "
887 "explicitly specified loop distribution"));
888
889 return false;
890 }
891
892 /// Return if distribution forced to be enabled/disabled for the loop.
893 ///
894 /// If the optional has a value, it indicates whether distribution was forced
895 /// to be enabled (true) or disabled (false). If the optional has no value
896 /// distribution was not forced either way.
897 const std::optional<bool> &isForced() const { return IsForced; }
898
899private:
900 /// Filter out checks between pointers from the same partition.
901 ///
902 /// \p PtrToPartition contains the partition number for pointers. Partition
903 /// number -1 means that the pointer is used in multiple partitions. In this
904 /// case we can't safely omit the check.
905 SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
906 const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
907 const SmallVectorImpl<int> &PtrToPartition,
908 const RuntimePointerChecking *RtPtrChecking) {
909 SmallVector<RuntimePointerCheck, 4> Checks;
910
911 copy_if(AllChecks, std::back_inserter(Checks),
912 [&](const RuntimePointerCheck &Check) {
913 for (unsigned PtrIdx1 : Check.first->Members)
914 for (unsigned PtrIdx2 : Check.second->Members)
915 // Only include this check if there is a pair of pointers
916 // that require checking and the pointers fall into
917 // separate partitions.
918 //
919 // (Note that we already know at this point that the two
920 // pointer groups need checking but it doesn't follow
921 // that each pair of pointers within the two groups need
922 // checking as well.
923 //
924 // In other words we don't want to include a check just
925 // because there is a pair of pointers between the two
926 // pointer groups that require checks and a different
927 // pair whose pointers fall into different partitions.)
928 if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
930 PtrToPartition, PtrIdx1, PtrIdx2))
931 return true;
932 return false;
933 });
934
935 return Checks;
936 }
937
938 /// Check whether the loop metadata is forcing distribution to be
939 /// enabled/disabled.
940 void setForced() {
941 std::optional<const MDOperand *> Value =
942 findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
943 if (!Value)
944 return;
945
946 const MDOperand *Op = *Value;
947 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
948 IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
949 }
950
951 Loop *L;
952 Function *F;
953
954 // Analyses used.
955 LoopInfo *LI;
956 const LoopAccessInfo *LAI = nullptr;
957 DominatorTree *DT;
958 ScalarEvolution *SE;
959 LoopAccessInfoManager &LAIs;
960 OptimizationRemarkEmitter *ORE;
961
962 /// Indicates whether distribution is forced to be enabled/disabled for
963 /// the loop.
964 ///
965 /// If the optional has a value, it indicates whether distribution was forced
966 /// to be enabled (true) or disabled (false). If the optional has no value
967 /// distribution was not forced either way.
968 std::optional<bool> IsForced;
969};
970
971} // end anonymous namespace
972
973static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
975 LoopAccessInfoManager &LAIs) {
976 // Build up a worklist of inner-loops to distribute. This is necessary as the
977 // act of distributing a loop creates new loops and can invalidate iterators
978 // across the loops.
979 SmallVector<Loop *, 8> Worklist;
980
981 for (Loop *TopLevelLoop : *LI)
982 for (Loop *L : depth_first(TopLevelLoop))
983 // We only handle inner-most loops.
984 if (L->isInnermost())
985 Worklist.push_back(L);
986
987 // Now walk the identified inner loops.
988 bool Changed = false;
989 for (Loop *L : Worklist) {
990 LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
991
992 // Do not reprocess loops we already distributed
993 if (getOptionalBoolLoopAttribute(L, DistributedMetaData).value_or(false)) {
995 dbgs() << "LDist: Distributed loop guarded for reprocessing\n");
996 continue;
997 }
998
999 // If distribution was forced for the specific loop to be
1000 // enabled/disabled, follow that. Otherwise use the global flag.
1001 if (LDL.isForced().value_or(EnableLoopDistribute))
1002 Changed |= LDL.processLoop();
1003 }
1004
1005 // Process each loop nest in the function.
1006 return Changed;
1007}
1008
1011 auto &LI = AM.getResult<LoopAnalysis>(F);
1012 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1013 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1015
1017 bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
1018 if (!Changed)
1019 return PreservedAnalyses::all();
1021 PA.preserve<LoopAnalysis>();
1023 return PA;
1024}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static const char *const LLVMLoopDistributeFollowupCoincident
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
#define LDIST_NAME
static const char *const LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupAll
static const char * DistributedMetaData
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma clang loop " "distribute(enable)"))
static const char *const LLVMLoopDistributeFollowupFallback
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, LoopAccessInfoManager &LAIs)
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static unsigned getSize(unsigned Kind)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This analysis provides dependence information for the memory accesses of a loop.
const RuntimePointerChecking * getRuntimePointerChecking() const
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
typename vector_type::const_iterator iterator
Definition SetVector.h:70
typename vector_type::const_iterator const_iterator
Definition SetVector.h:71
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
Definition Metadata.h:650
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
LLVM_ABI std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1724
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
#define N