LLVM 18.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(PredBB)) {
75 if (isa<IndirectBrInst>(PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Users, [&](User *U) {
132 auto *Use = cast<Instruction>(U);
133 return !L->contains(Use->getParent());
134 }))
135 UsedOutside.push_back(&Inst);
136 }
137
138 return UsedOutside;
139}
140
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
225 if (S && S->getString().equals(StringMD)) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(0, NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(*Width, IsScalable.value_or(false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
278 MDs.push_back(nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(0).get();
294 if (!isa<MDString>(NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.starts_with(InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
320 MDs.push_back(Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
341 return FollowupLoopID;
342}
343
346}
347
350}
351
353 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
374 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
428 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
438 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
470 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
471 Value *IncV = PN->getIncomingValue(LatchIdx);
472
473 for (User *U : PN->users())
474 if (U != Cond && U != IncV) return false;
475
476 for (User *U : IncV->users())
477 if (U != Cond && U != PN) return false;
478 return true;
479}
480
481
483 LoopInfo *LI, MemorySSA *MSSA) {
484 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
485 auto *Preheader = L->getLoopPreheader();
486 assert(Preheader && "Preheader should exist!");
487
488 std::unique_ptr<MemorySSAUpdater> MSSAU;
489 if (MSSA)
490 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
491
492 // Now that we know the removal is safe, remove the loop by changing the
493 // branch from the preheader to go to the single exit block.
494 //
495 // Because we're deleting a large chunk of code at once, the sequence in which
496 // we remove things is very important to avoid invalidation issues.
497
498 // Tell ScalarEvolution that the loop is deleted. Do this before
499 // deleting the loop so that ScalarEvolution can look at the loop
500 // to determine what it needs to clean up.
501 if (SE) {
502 SE->forgetLoop(L);
504 }
505
506 Instruction *OldTerm = Preheader->getTerminator();
507 assert(!OldTerm->mayHaveSideEffects() &&
508 "Preheader must end with a side-effect-free terminator");
509 assert(OldTerm->getNumSuccessors() == 1 &&
510 "Preheader must have a single successor");
511 // Connect the preheader to the exit block. Keep the old edge to the header
512 // around to perform the dominator tree update in two separate steps
513 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
514 // preheader -> header.
515 //
516 //
517 // 0. Preheader 1. Preheader 2. Preheader
518 // | | | |
519 // V | V |
520 // Header <--\ | Header <--\ | Header <--\
521 // | | | | | | | | | | |
522 // | V | | | V | | | V |
523 // | Body --/ | | Body --/ | | Body --/
524 // V V V V V
525 // Exit Exit Exit
526 //
527 // By doing this is two separate steps we can perform the dominator tree
528 // update without using the batch update API.
529 //
530 // Even when the loop is never executed, we cannot remove the edge from the
531 // source block to the exit block. Consider the case where the unexecuted loop
532 // branches back to an outer loop. If we deleted the loop and removed the edge
533 // coming to this inner loop, this will break the outer loop structure (by
534 // deleting the backedge of the outer loop). If the outer loop is indeed a
535 // non-loop, it will be deleted in a future iteration of loop deletion pass.
536 IRBuilder<> Builder(OldTerm);
537
538 auto *ExitBlock = L->getUniqueExitBlock();
539 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
540 if (ExitBlock) {
541 assert(ExitBlock && "Should have a unique exit block!");
542 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
543
544 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
545 // Remove the old branch. The conditional branch becomes a new terminator.
546 OldTerm->eraseFromParent();
547
548 // Rewrite phis in the exit block to get their inputs from the Preheader
549 // instead of the exiting block.
550 for (PHINode &P : ExitBlock->phis()) {
551 // Set the zero'th element of Phi to be from the preheader and remove all
552 // other incoming values. Given the loop has dedicated exits, all other
553 // incoming values must be from the exiting blocks.
554 int PredIndex = 0;
555 P.setIncomingBlock(PredIndex, Preheader);
556 // Removes all incoming values from all other exiting blocks (including
557 // duplicate values from an exiting block).
558 // Nuke all entries except the zero'th entry which is the preheader entry.
559 P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
560 /* DeletePHIIfEmpty */ false);
561
562 assert((P.getNumIncomingValues() == 1 &&
563 P.getIncomingBlock(PredIndex) == Preheader) &&
564 "Should have exactly one value and that's from the preheader!");
565 }
566
567 if (DT) {
568 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
569 if (MSSA) {
570 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
571 *DT);
572 if (VerifyMemorySSA)
573 MSSA->verifyMemorySSA();
574 }
575 }
576
577 // Disconnect the loop body by branching directly to its exit.
578 Builder.SetInsertPoint(Preheader->getTerminator());
579 Builder.CreateBr(ExitBlock);
580 // Remove the old branch.
581 Preheader->getTerminator()->eraseFromParent();
582 } else {
583 assert(L->hasNoExitBlocks() &&
584 "Loop should have either zero or one exit blocks.");
585
586 Builder.SetInsertPoint(OldTerm);
587 Builder.CreateUnreachable();
588 Preheader->getTerminator()->eraseFromParent();
589 }
590
591 if (DT) {
592 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
593 if (MSSA) {
594 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
595 *DT);
596 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
597 L->block_end());
598 MSSAU->removeBlocks(DeadBlockSet);
599 if (VerifyMemorySSA)
600 MSSA->verifyMemorySSA();
601 }
602 }
603
604 // Use a map to unique and a vector to guarantee deterministic ordering.
608
609 if (ExitBlock) {
610 // Given LCSSA form is satisfied, we should not have users of instructions
611 // within the dead loop outside of the loop. However, LCSSA doesn't take
612 // unreachable uses into account. We handle them here.
613 // We could do it after drop all references (in this case all users in the
614 // loop will be already eliminated and we have less work to do but according
615 // to API doc of User::dropAllReferences only valid operation after dropping
616 // references, is deletion. So let's substitute all usages of
617 // instruction from the loop with poison value of corresponding type first.
618 for (auto *Block : L->blocks())
619 for (Instruction &I : *Block) {
620 auto *Poison = PoisonValue::get(I.getType());
621 for (Use &U : llvm::make_early_inc_range(I.uses())) {
622 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
623 if (L->contains(Usr->getParent()))
624 continue;
625 // If we have a DT then we can check that uses outside a loop only in
626 // unreachable block.
627 if (DT)
629 "Unexpected user in reachable block");
630 U.set(Poison);
631 }
632
633 // RemoveDIs: do the same as below for DPValues.
634 if (Block->IsNewDbgInfoFormat) {
635 for (DPValue &DPV :
636 llvm::make_early_inc_range(I.getDbgValueRange())) {
637 DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
638 DPV.getDebugLoc().get());
639 if (!DeadDebugSet.insert(Key).second)
640 continue;
641 // Unlinks the DPV from it's container, for later insertion.
642 DPV.removeFromParent();
643 DeadDPValues.push_back(&DPV);
644 }
645 }
646
647 // For one of each variable encountered, preserve a debug intrinsic (set
648 // to Poison) and transfer it to the loop exit. This terminates any
649 // variable locations that were set during the loop.
650 auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
651 if (!DVI)
652 continue;
653 if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
654 continue;
655 DeadDebugInst.push_back(DVI);
656 }
657
658 // After the loop has been deleted all the values defined and modified
659 // inside the loop are going to be unavailable. Values computed in the
660 // loop will have been deleted, automatically causing their debug uses
661 // be be replaced with undef. Loop invariant values will still be available.
662 // Move dbg.values out the loop so that earlier location ranges are still
663 // terminated and loop invariant assignments are preserved.
664 DIBuilder DIB(*ExitBlock->getModule());
665 BasicBlock::iterator InsertDbgValueBefore =
666 ExitBlock->getFirstInsertionPt();
667 assert(InsertDbgValueBefore != ExitBlock->end() &&
668 "There should be a non-PHI instruction in exit block, else these "
669 "instructions will have no parent.");
670
671 for (auto *DVI : DeadDebugInst)
672 DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
673
674 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
675 // each DPValue right at the start of the block, wheras dbg.values would be
676 // repeatedly inserted before the first instruction. To replicate this
677 // behaviour, do it backwards.
678 for (DPValue *DPV : llvm::reverse(DeadDPValues))
679 ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore);
680 }
681
682 // Remove the block from the reference counting scheme, so that we can
683 // delete it freely later.
684 for (auto *Block : L->blocks())
685 Block->dropAllReferences();
686
687 if (MSSA && VerifyMemorySSA)
688 MSSA->verifyMemorySSA();
689
690 if (LI) {
691 // Erase the instructions and the blocks without having to worry
692 // about ordering because we already dropped the references.
693 // NOTE: This iteration is safe because erasing the block does not remove
694 // its entry from the loop's block list. We do that in the next section.
695 for (BasicBlock *BB : L->blocks())
696 BB->eraseFromParent();
697
698 // Finally, the blocks from loopinfo. This has to happen late because
699 // otherwise our loop iterators won't work.
700
702 blocks.insert(L->block_begin(), L->block_end());
703 for (BasicBlock *BB : blocks)
704 LI->removeBlock(BB);
705
706 // The last step is to update LoopInfo now that we've eliminated this loop.
707 // Note: LoopInfo::erase remove the given loop and relink its subloops with
708 // its parent. While removeLoop/removeChildLoop remove the given loop but
709 // not relink its subloops, which is what we want.
710 if (Loop *ParentLoop = L->getParentLoop()) {
711 Loop::iterator I = find(*ParentLoop, L);
712 assert(I != ParentLoop->end() && "Couldn't find loop");
713 ParentLoop->removeChildLoop(I);
714 } else {
715 Loop::iterator I = find(*LI, L);
716 assert(I != LI->end() && "Couldn't find loop");
717 LI->removeLoop(I);
718 }
719 LI->destroy(L);
720 }
721}
722
724 LoopInfo &LI, MemorySSA *MSSA) {
725 auto *Latch = L->getLoopLatch();
726 assert(Latch && "multiple latches not yet supported");
727 auto *Header = L->getHeader();
728 Loop *OutermostLoop = L->getOutermostLoop();
729
730 SE.forgetLoop(L);
732
733 std::unique_ptr<MemorySSAUpdater> MSSAU;
734 if (MSSA)
735 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
736
737 // Update the CFG and domtree. We chose to special case a couple of
738 // of common cases for code quality and test readability reasons.
739 [&]() -> void {
740 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
741 if (!BI->isConditional()) {
742 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
743 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
744 MSSAU.get());
745 return;
746 }
747
748 // Conditional latch/exit - note that latch can be shared by inner
749 // and outer loop so the other target doesn't need to an exit
750 if (L->isLoopExiting(Latch)) {
751 // TODO: Generalize ConstantFoldTerminator so that it can be used
752 // here without invalidating LCSSA or MemorySSA. (Tricky case for
753 // LCSSA: header is an exit block of a preceeding sibling loop w/o
754 // dedicated exits.)
755 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
756 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
757
758 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
759 Header->removePredecessor(Latch, true);
760
761 IRBuilder<> Builder(BI);
762 auto *NewBI = Builder.CreateBr(ExitBB);
763 // Transfer the metadata to the new branch instruction (minus the
764 // loop info since this is no longer a loop)
765 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
766 LLVMContext::MD_annotation});
767
768 BI->eraseFromParent();
769 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
770 if (MSSA)
771 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
772 return;
773 }
774 }
775
776 // General case. By splitting the backedge, and then explicitly making it
777 // unreachable we gracefully handle corner cases such as switch and invoke
778 // termiantors.
779 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
780
781 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
782 (void)changeToUnreachable(BackedgeBB->getTerminator(),
783 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
784 }();
785
786 // Erase (and destroy) this loop instance. Handles relinking sub-loops
787 // and blocks within the loop as needed.
788 LI.erase(L);
789
790 // If the loop we broke had a parent, then changeToUnreachable might have
791 // caused a block to be removed from the parent loop (see loop_nest_lcssa
792 // test case in zero-btc.ll for an example), thus changing the parent's
793 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
794 // loop which might have a had a block removed.
795 if (OutermostLoop != L)
796 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
797}
798
799
800/// Checks if \p L has an exiting latch branch. There may also be other
801/// exiting blocks. Returns branch instruction terminating the loop
802/// latch if above check is successful, nullptr otherwise.
804 BasicBlock *Latch = L->getLoopLatch();
805 if (!Latch)
806 return nullptr;
807
808 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
809 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
810 return nullptr;
811
812 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
813 LatchBR->getSuccessor(1) == L->getHeader()) &&
814 "At least one edge out of the latch must go to the header");
815
816 return LatchBR;
817}
818
819/// Return the estimated trip count for any exiting branch which dominates
820/// the loop latch.
821static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
822 Loop *L,
823 uint64_t &OrigExitWeight) {
824 // To estimate the number of times the loop body was executed, we want to
825 // know the number of times the backedge was taken, vs. the number of times
826 // we exited the loop.
827 uint64_t LoopWeight, ExitWeight;
828 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
829 return std::nullopt;
830
831 if (L->contains(ExitingBranch->getSuccessor(1)))
832 std::swap(LoopWeight, ExitWeight);
833
834 if (!ExitWeight)
835 // Don't have a way to return predicated infinite
836 return std::nullopt;
837
838 OrigExitWeight = ExitWeight;
839
840 // Estimated exit count is a ratio of the loop weight by the weight of the
841 // edge exiting the loop, rounded to nearest.
842 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
843 // Estimated trip count is one plus estimated exit count.
844 return ExitCount + 1;
845}
846
847std::optional<unsigned>
849 unsigned *EstimatedLoopInvocationWeight) {
850 // Currently we take the estimate exit count only from the loop latch,
851 // ignoring other exiting blocks. This can overestimate the trip count
852 // if we exit through another exit, but can never underestimate it.
853 // TODO: incorporate information from other exits
854 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
855 uint64_t ExitWeight;
856 if (std::optional<uint64_t> EstTripCount =
857 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
858 if (EstimatedLoopInvocationWeight)
859 *EstimatedLoopInvocationWeight = ExitWeight;
860 return *EstTripCount;
861 }
862 }
863 return std::nullopt;
864}
865
866bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
867 unsigned EstimatedloopInvocationWeight) {
868 // At the moment, we currently support changing the estimate trip count of
869 // the latch branch only. We could extend this API to manipulate estimated
870 // trip counts for any exit.
872 if (!LatchBranch)
873 return false;
874
875 // Calculate taken and exit weights.
876 unsigned LatchExitWeight = 0;
877 unsigned BackedgeTakenWeight = 0;
878
879 if (EstimatedTripCount > 0) {
880 LatchExitWeight = EstimatedloopInvocationWeight;
881 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
882 }
883
884 // Make a swap if back edge is taken when condition is "false".
885 if (LatchBranch->getSuccessor(0) != L->getHeader())
886 std::swap(BackedgeTakenWeight, LatchExitWeight);
887
888 MDBuilder MDB(LatchBranch->getContext());
889
890 // Set/Update profile metadata.
891 LatchBranch->setMetadata(
892 LLVMContext::MD_prof,
893 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
894
895 return true;
896}
897
899 ScalarEvolution &SE) {
900 Loop *OuterL = InnerLoop->getParentLoop();
901 if (!OuterL)
902 return true;
903
904 // Get the backedge taken count for the inner loop
905 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
906 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
907 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
908 !InnerLoopBECountSC->getType()->isIntegerTy())
909 return false;
910
911 // Get whether count is invariant to the outer loop
913 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
915 return false;
916
917 return true;
918}
919
921 switch (RK) {
922 default:
923 llvm_unreachable("Unknown min/max recurrence kind");
924 case RecurKind::UMin:
925 return Intrinsic::umin;
926 case RecurKind::UMax:
927 return Intrinsic::umax;
928 case RecurKind::SMin:
929 return Intrinsic::smin;
930 case RecurKind::SMax:
931 return Intrinsic::smax;
932 case RecurKind::FMin:
933 return Intrinsic::minnum;
934 case RecurKind::FMax:
935 return Intrinsic::maxnum;
936 case RecurKind::FMinimum:
937 return Intrinsic::minimum;
938 case RecurKind::FMaximum:
939 return Intrinsic::maximum;
940 }
941}
942
944 switch (RK) {
945 default:
946 llvm_unreachable("Unknown min/max recurrence kind");
947 case RecurKind::UMin:
948 return CmpInst::ICMP_ULT;
949 case RecurKind::UMax:
950 return CmpInst::ICMP_UGT;
951 case RecurKind::SMin:
952 return CmpInst::ICMP_SLT;
953 case RecurKind::SMax:
954 return CmpInst::ICMP_SGT;
955 case RecurKind::FMin:
956 return CmpInst::FCMP_OLT;
957 case RecurKind::FMax:
958 return CmpInst::FCMP_OGT;
959 // We do not add FMinimum/FMaximum recurrence kind here since there is no
960 // equivalent predicate which compares signed zeroes according to the
961 // semantics of the intrinsics (llvm.minimum/maximum).
962 }
963}
964
966 RecurKind RK, Value *Left, Value *Right) {
967 if (auto VTy = dyn_cast<VectorType>(Left->getType()))
968 StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
969 Value *Cmp =
970 Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
971 return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
972}
973
975 Value *Right) {
976 Type *Ty = Left->getType();
977 if (Ty->isIntOrIntVectorTy() ||
978 (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
979 // TODO: Add float minnum/maxnum support when FMF nnan is set.
981 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
982 "rdx.minmax");
983 }
985 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
986 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
987 return Select;
988}
989
990// Helper to generate an ordered reduction.
992 unsigned Op, RecurKind RdxKind) {
993 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
994
995 // Extract and apply reduction ops in ascending order:
996 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
997 Value *Result = Acc;
998 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
999 Value *Ext =
1000 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
1001
1002 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1003 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
1004 "bin.rdx");
1005 } else {
1007 "Invalid min/max");
1008 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
1009 }
1010 }
1011
1012 return Result;
1013}
1014
1015// Helper to generate a log2 shuffle reduction.
1017 unsigned Op, RecurKind RdxKind) {
1018 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1019 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1020 // and vector ops, reducing the set of values being computed by half each
1021 // round.
1022 assert(isPowerOf2_32(VF) &&
1023 "Reduction emission only supported for pow2 vectors!");
1024 // Note: fast-math-flags flags are controlled by the builder configuration
1025 // and are assumed to apply to all generated arithmetic instructions. Other
1026 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1027 // of the builder configuration, and since they're not passed explicitly,
1028 // will never be relevant here. Note that it would be generally unsound to
1029 // propagate these from an intrinsic call to the expansion anyways as we/
1030 // change the order of operations.
1031 Value *TmpVec = Src;
1032 SmallVector<int, 32> ShuffleMask(VF);
1033 for (unsigned i = VF; i != 1; i >>= 1) {
1034 // Move the upper half of the vector to the lower half.
1035 for (unsigned j = 0; j != i / 2; ++j)
1036 ShuffleMask[j] = i / 2 + j;
1037
1038 // Fill the rest of the mask with undef.
1039 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1040
1041 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1042
1043 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1044 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1045 "bin.rdx");
1046 } else {
1048 "Invalid min/max");
1049 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1050 }
1051 }
1052 // The result is in the first element of the vector.
1053 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1054}
1055
1058 PHINode *OrigPhi) {
1059 assert(
1061 "Unexpected reduction kind");
1062 Value *InitVal = Desc.getRecurrenceStartValue();
1063 Value *NewVal = nullptr;
1064
1065 // First use the original phi to determine the new value we're trying to
1066 // select from in the loop.
1067 SelectInst *SI = nullptr;
1068 for (auto *U : OrigPhi->users()) {
1069 if ((SI = dyn_cast<SelectInst>(U)))
1070 break;
1071 }
1072 assert(SI && "One user of the original phi should be a select");
1073
1074 if (SI->getTrueValue() == OrigPhi)
1075 NewVal = SI->getFalseValue();
1076 else {
1077 assert(SI->getFalseValue() == OrigPhi &&
1078 "At least one input to the select should be the original Phi");
1079 NewVal = SI->getTrueValue();
1080 }
1081
1082 // Create a splat vector with the new value and compare this to the vector
1083 // we want to reduce.
1084 ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1085 Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1086 Value *Cmp =
1087 Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1088
1089 // If any predicate is true it means that we want to select the new value.
1090 Cmp = Builder.CreateOrReduce(Cmp);
1091 return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1092}
1093
1095 RecurKind RdxKind) {
1096 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1097 switch (RdxKind) {
1098 case RecurKind::Add:
1099 return Builder.CreateAddReduce(Src);
1100 case RecurKind::Mul:
1101 return Builder.CreateMulReduce(Src);
1102 case RecurKind::And:
1103 return Builder.CreateAndReduce(Src);
1104 case RecurKind::Or:
1105 return Builder.CreateOrReduce(Src);
1106 case RecurKind::Xor:
1107 return Builder.CreateXorReduce(Src);
1108 case RecurKind::FMulAdd:
1109 case RecurKind::FAdd:
1110 return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1111 Src);
1112 case RecurKind::FMul:
1113 return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1114 case RecurKind::SMax:
1115 return Builder.CreateIntMaxReduce(Src, true);
1116 case RecurKind::SMin:
1117 return Builder.CreateIntMinReduce(Src, true);
1118 case RecurKind::UMax:
1119 return Builder.CreateIntMaxReduce(Src, false);
1120 case RecurKind::UMin:
1121 return Builder.CreateIntMinReduce(Src, false);
1122 case RecurKind::FMax:
1123 return Builder.CreateFPMaxReduce(Src);
1124 case RecurKind::FMin:
1125 return Builder.CreateFPMinReduce(Src);
1126 case RecurKind::FMinimum:
1127 return Builder.CreateFPMinimumReduce(Src);
1128 case RecurKind::FMaximum:
1129 return Builder.CreateFPMaximumReduce(Src);
1130 default:
1131 llvm_unreachable("Unhandled opcode");
1132 }
1133}
1134
1136 const RecurrenceDescriptor &Desc, Value *Src,
1137 PHINode *OrigPhi) {
1138 // TODO: Support in-order reductions based on the recurrence descriptor.
1139 // All ops in the reduction inherit fast-math-flags from the recurrence
1140 // descriptor.
1142 B.setFastMathFlags(Desc.getFastMathFlags());
1143
1144 RecurKind RK = Desc.getRecurrenceKind();
1146 return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
1147
1148 return createSimpleTargetReduction(B, Src, RK);
1149}
1150
1153 Value *Src, Value *Start) {
1154 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1155 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1156 "Unexpected reduction kind");
1157 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1158 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1159
1160 return B.CreateFAddReduce(Start, Src);
1161}
1162
1164 bool IncludeWrapFlags) {
1165 auto *VecOp = dyn_cast<Instruction>(I);
1166 if (!VecOp)
1167 return;
1168 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1169 : dyn_cast<Instruction>(OpValue);
1170 if (!Intersection)
1171 return;
1172 const unsigned Opcode = Intersection->getOpcode();
1173 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1174 for (auto *V : VL) {
1175 auto *Instr = dyn_cast<Instruction>(V);
1176 if (!Instr)
1177 continue;
1178 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1179 VecOp->andIRFlags(V);
1180 }
1181}
1182
1183bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1184 ScalarEvolution &SE) {
1185 const SCEV *Zero = SE.getZero(S->getType());
1186 return SE.isAvailableAtLoopEntry(S, L) &&
1187 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1188}
1189
1191 ScalarEvolution &SE) {
1192 const SCEV *Zero = SE.getZero(S->getType());
1193 return SE.isAvailableAtLoopEntry(S, L) &&
1194 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1195}
1196
1197bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1198 ScalarEvolution &SE) {
1199 const SCEV *Zero = SE.getZero(S->getType());
1200 return SE.isAvailableAtLoopEntry(S, L) &&
1201 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1202}
1203
1205 ScalarEvolution &SE) {
1206 const SCEV *Zero = SE.getZero(S->getType());
1207 return SE.isAvailableAtLoopEntry(S, L) &&
1208 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1209}
1210
1212 bool Signed) {
1213 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1216 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1217 return SE.isAvailableAtLoopEntry(S, L) &&
1218 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1219 SE.getConstant(Min));
1220}
1221
1223 bool Signed) {
1224 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1227 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1228 return SE.isAvailableAtLoopEntry(S, L) &&
1229 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1230 SE.getConstant(Max));
1231}
1232
1233//===----------------------------------------------------------------------===//
1234// rewriteLoopExitValues - Optimize IV users outside the loop.
1235// As a side effect, reduces the amount of IV processing within the loop.
1236//===----------------------------------------------------------------------===//
1237
1238static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1241 Visited.insert(I);
1242 WorkList.push_back(I);
1243 while (!WorkList.empty()) {
1244 const Instruction *Curr = WorkList.pop_back_val();
1245 // This use is outside the loop, nothing to do.
1246 if (!L->contains(Curr))
1247 continue;
1248 // Do we assume it is a "hard" use which will not be eliminated easily?
1249 if (Curr->mayHaveSideEffects())
1250 return true;
1251 // Otherwise, add all its users to worklist.
1252 for (const auto *U : Curr->users()) {
1253 auto *UI = cast<Instruction>(U);
1254 if (Visited.insert(UI).second)
1255 WorkList.push_back(UI);
1256 }
1257 }
1258 return false;
1259}
1260
1261// Collect information about PHI nodes which can be transformed in
1262// rewriteLoopExitValues.
1264 PHINode *PN; // For which PHI node is this replacement?
1265 unsigned Ith; // For which incoming value?
1266 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1267 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1268 bool HighCost; // Is this expansion a high-cost?
1269
1270 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1271 bool H)
1272 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1273 HighCost(H) {}
1274};
1275
1276// Check whether it is possible to delete the loop after rewriting exit
1277// value. If it is possible, ignore ReplaceExitValue and do rewriting
1278// aggressively.
1279static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1280 BasicBlock *Preheader = L->getLoopPreheader();
1281 // If there is no preheader, the loop will not be deleted.
1282 if (!Preheader)
1283 return false;
1284
1285 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1286 // We obviate multiple ExitingBlocks case for simplicity.
1287 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1288 // after exit value rewriting, we can enhance the logic here.
1289 SmallVector<BasicBlock *, 4> ExitingBlocks;
1290 L->getExitingBlocks(ExitingBlocks);
1292 L->getUniqueExitBlocks(ExitBlocks);
1293 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1294 return false;
1295
1296 BasicBlock *ExitBlock = ExitBlocks[0];
1297 BasicBlock::iterator BI = ExitBlock->begin();
1298 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1299 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1300
1301 // If the Incoming value of P is found in RewritePhiSet, we know it
1302 // could be rewritten to use a loop invariant value in transformation
1303 // phase later. Skip it in the loop invariant check below.
1304 bool found = false;
1305 for (const RewritePhi &Phi : RewritePhiSet) {
1306 unsigned i = Phi.Ith;
1307 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1308 found = true;
1309 break;
1310 }
1311 }
1312
1313 Instruction *I;
1314 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1315 if (!L->hasLoopInvariantOperands(I))
1316 return false;
1317
1318 ++BI;
1319 }
1320
1321 for (auto *BB : L->blocks())
1322 if (llvm::any_of(*BB, [](Instruction &I) {
1323 return I.mayHaveSideEffects();
1324 }))
1325 return false;
1326
1327 return true;
1328}
1329
1330/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1331/// and returns true if this Phi is an induction phi in the loop. When
1332/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1333static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1335 if (!Phi)
1336 return false;
1337 if (!L->getLoopPreheader())
1338 return false;
1339 if (Phi->getParent() != L->getHeader())
1340 return false;
1341 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1342}
1343
1345 ScalarEvolution *SE,
1346 const TargetTransformInfo *TTI,
1347 SCEVExpander &Rewriter, DominatorTree *DT,
1350 // Check a pre-condition.
1351 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1352 "Indvars did not preserve LCSSA!");
1353
1354 SmallVector<BasicBlock*, 8> ExitBlocks;
1355 L->getUniqueExitBlocks(ExitBlocks);
1356
1357 SmallVector<RewritePhi, 8> RewritePhiSet;
1358 // Find all values that are computed inside the loop, but used outside of it.
1359 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1360 // the exit blocks of the loop to find them.
1361 for (BasicBlock *ExitBB : ExitBlocks) {
1362 // If there are no PHI nodes in this exit block, then no values defined
1363 // inside the loop are used on this path, skip it.
1364 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1365 if (!PN) continue;
1366
1367 unsigned NumPreds = PN->getNumIncomingValues();
1368
1369 // Iterate over all of the PHI nodes.
1370 BasicBlock::iterator BBI = ExitBB->begin();
1371 while ((PN = dyn_cast<PHINode>(BBI++))) {
1372 if (PN->use_empty())
1373 continue; // dead use, don't replace it
1374
1375 if (!SE->isSCEVable(PN->getType()))
1376 continue;
1377
1378 // Iterate over all of the values in all the PHI nodes.
1379 for (unsigned i = 0; i != NumPreds; ++i) {
1380 // If the value being merged in is not integer or is not defined
1381 // in the loop, skip it.
1382 Value *InVal = PN->getIncomingValue(i);
1383 if (!isa<Instruction>(InVal))
1384 continue;
1385
1386 // If this pred is for a subloop, not L itself, skip it.
1387 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1388 continue; // The Block is in a subloop, skip it.
1389
1390 // Check that InVal is defined in the loop.
1391 Instruction *Inst = cast<Instruction>(InVal);
1392 if (!L->contains(Inst))
1393 continue;
1394
1395 // Find exit values which are induction variables in the loop, and are
1396 // unused in the loop, with the only use being the exit block PhiNode,
1397 // and the induction variable update binary operator.
1398 // The exit value can be replaced with the final value when it is cheap
1399 // to do so.
1402 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1403 if (IndPhi) {
1404 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1405 continue;
1406 // This is an induction PHI. Check that the only users are PHI
1407 // nodes, and induction variable update binary operators.
1408 if (llvm::any_of(Inst->users(), [&](User *U) {
1409 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1410 return true;
1411 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1412 if (B && B != ID.getInductionBinOp())
1413 return true;
1414 return false;
1415 }))
1416 continue;
1417 } else {
1418 // If it is not an induction phi, it must be an induction update
1419 // binary operator with an induction phi user.
1420 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1421 if (!B)
1422 continue;
1423 if (llvm::any_of(Inst->users(), [&](User *U) {
1424 PHINode *Phi = dyn_cast<PHINode>(U);
1425 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1426 return true;
1427 return false;
1428 }))
1429 continue;
1430 if (B != ID.getInductionBinOp())
1431 continue;
1432 }
1433 }
1434
1435 // Okay, this instruction has a user outside of the current loop
1436 // and varies predictably *inside* the loop. Evaluate the value it
1437 // contains when the loop exits, if possible. We prefer to start with
1438 // expressions which are true for all exits (so as to maximize
1439 // expression reuse by the SCEVExpander), but resort to per-exit
1440 // evaluation if that fails.
1441 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1442 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1443 !SE->isLoopInvariant(ExitValue, L) ||
1444 !Rewriter.isSafeToExpand(ExitValue)) {
1445 // TODO: This should probably be sunk into SCEV in some way; maybe a
1446 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1447 // most SCEV expressions and other recurrence types (e.g. shift
1448 // recurrences). Is there existing code we can reuse?
1449 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1450 if (isa<SCEVCouldNotCompute>(ExitCount))
1451 continue;
1452 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1453 if (AddRec->getLoop() == L)
1454 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1455 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1456 !SE->isLoopInvariant(ExitValue, L) ||
1457 !Rewriter.isSafeToExpand(ExitValue))
1458 continue;
1459 }
1460
1461 // Computing the value outside of the loop brings no benefit if it is
1462 // definitely used inside the loop in a way which can not be optimized
1463 // away. Avoid doing so unless we know we have a value which computes
1464 // the ExitValue already. TODO: This should be merged into SCEV
1465 // expander to leverage its knowledge of existing expressions.
1466 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1467 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1468 continue;
1469
1470 // Check if expansions of this SCEV would count as being high cost.
1471 bool HighCost = Rewriter.isHighCostExpansion(
1472 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1473
1474 // Note that we must not perform expansions until after
1475 // we query *all* the costs, because if we perform temporary expansion
1476 // inbetween, one that we might not intend to keep, said expansion
1477 // *may* affect cost calculation of the next SCEV's we'll query,
1478 // and next SCEV may errneously get smaller cost.
1479
1480 // Collect all the candidate PHINodes to be rewritten.
1481 Instruction *InsertPt =
1482 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1483 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1484 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1485 }
1486 }
1487 }
1488
1489 // TODO: evaluate whether it is beneficial to change how we calculate
1490 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1491 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1492 // potentially giving cost bonus to those other SCEV's?
1493
1494 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1495 int NumReplaced = 0;
1496
1497 // Transformation.
1498 for (const RewritePhi &Phi : RewritePhiSet) {
1499 PHINode *PN = Phi.PN;
1500
1501 // Only do the rewrite when the ExitValue can be expanded cheaply.
1502 // If LoopCanBeDel is true, rewrite exit value aggressively.
1505 !LoopCanBeDel && Phi.HighCost)
1506 continue;
1507
1508 Value *ExitVal = Rewriter.expandCodeFor(
1509 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1510
1511 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1512 << '\n'
1513 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1514
1515#ifndef NDEBUG
1516 // If we reuse an instruction from a loop which is neither L nor one of
1517 // its containing loops, we end up breaking LCSSA form for this loop by
1518 // creating a new use of its instruction.
1519 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1520 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1521 if (EVL != L)
1522 assert(EVL->contains(L) && "LCSSA breach detected!");
1523#endif
1524
1525 NumReplaced++;
1526 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1527 PN->setIncomingValue(Phi.Ith, ExitVal);
1528 // It's necessary to tell ScalarEvolution about this explicitly so that
1529 // it can walk the def-use list and forget all SCEVs, as it may not be
1530 // watching the PHI itself. Once the new exit value is in place, there
1531 // may not be a def-use connection between the loop and every instruction
1532 // which got a SCEVAddRecExpr for that loop.
1533 SE->forgetValue(PN);
1534
1535 // If this instruction is dead now, delete it. Don't do it now to avoid
1536 // invalidating iterators.
1537 if (isInstructionTriviallyDead(Inst, TLI))
1538 DeadInsts.push_back(Inst);
1539
1540 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1541 if (PN->getNumIncomingValues() == 1 &&
1542 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1543 PN->replaceAllUsesWith(ExitVal);
1544 PN->eraseFromParent();
1545 }
1546 }
1547
1548 // The insertion point instruction may have been deleted; clear it out
1549 // so that the rewriter doesn't trip over it later.
1550 Rewriter.clearInsertPoint();
1551 return NumReplaced;
1552}
1553
1554/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1555/// \p OrigLoop.
1556void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1557 Loop *RemainderLoop, uint64_t UF) {
1558 assert(UF > 0 && "Zero unrolled factor is not supported");
1559 assert(UnrolledLoop != RemainderLoop &&
1560 "Unrolled and Remainder loops are expected to distinct");
1561
1562 // Get number of iterations in the original scalar loop.
1563 unsigned OrigLoopInvocationWeight = 0;
1564 std::optional<unsigned> OrigAverageTripCount =
1565 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1566 if (!OrigAverageTripCount)
1567 return;
1568
1569 // Calculate number of iterations in unrolled loop.
1570 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1571 // Calculate number of iterations for remainder loop.
1572 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1573
1574 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1575 OrigLoopInvocationWeight);
1576 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1577 OrigLoopInvocationWeight);
1578}
1579
1580/// Utility that implements appending of loops onto a worklist.
1581/// Loops are added in preorder (analogous for reverse postorder for trees),
1582/// and the worklist is processed LIFO.
1583template <typename RangeT>
1585 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1586 // We use an internal worklist to build up the preorder traversal without
1587 // recursion.
1588 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1589
1590 // We walk the initial sequence of loops in reverse because we generally want
1591 // to visit defs before uses and the worklist is LIFO.
1592 for (Loop *RootL : Loops) {
1593 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1594 assert(PreOrderWorklist.empty() &&
1595 "Must start with an empty preorder walk worklist.");
1596 PreOrderWorklist.push_back(RootL);
1597 do {
1598 Loop *L = PreOrderWorklist.pop_back_val();
1599 PreOrderWorklist.append(L->begin(), L->end());
1600 PreOrderLoops.push_back(L);
1601 } while (!PreOrderWorklist.empty());
1602
1603 Worklist.insert(std::move(PreOrderLoops));
1604 PreOrderLoops.clear();
1605 }
1606}
1607
1608template <typename RangeT>
1612}
1613
1614template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1616
1617template void
1618llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1620
1623 appendReversedLoopsToWorklist(LI, Worklist);
1624}
1625
1627 LoopInfo *LI, LPPassManager *LPM) {
1628 Loop &New = *LI->AllocateLoop();
1629 if (PL)
1630 PL->addChildLoop(&New);
1631 else
1632 LI->addTopLevelLoop(&New);
1633
1634 if (LPM)
1635 LPM->addLoop(New);
1636
1637 // Add all of the blocks in L to the new loop.
1638 for (BasicBlock *BB : L->blocks())
1639 if (LI->getLoopFor(BB) == L)
1640 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1641
1642 // Add all of the subloops to the new loop.
1643 for (Loop *I : *L)
1644 cloneLoop(I, &New, VM, LI, LPM);
1645
1646 return &New;
1647}
1648
1649/// IR Values for the lower and upper bounds of a pointer evolution. We
1650/// need to use value-handles because SCEV expansion can invalidate previously
1651/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1652/// a previous one.
1657};
1658
1659/// Expand code for the lower and upper bound of the pointer group \p CG
1660/// in \p TheLoop. \return the values for the bounds.
1662 Loop *TheLoop, Instruction *Loc,
1663 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1664 LLVMContext &Ctx = Loc->getContext();
1665 Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
1666
1667 Value *Start = nullptr, *End = nullptr;
1668 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1669 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1670
1671 // If the Low and High values are themselves loop-variant, then we may want
1672 // to expand the range to include those covered by the outer loop as well.
1673 // There is a trade-off here with the advantage being that creating checks
1674 // using the expanded range permits the runtime memory checks to be hoisted
1675 // out of the outer loop. This reduces the cost of entering the inner loop,
1676 // which can be significant for low trip counts. The disadvantage is that
1677 // there is a chance we may now never enter the vectorized inner loop,
1678 // whereas using a restricted range check could have allowed us to enter at
1679 // least once. This is why the behaviour is not currently the default and is
1680 // controlled by the parameter 'HoistRuntimeChecks'.
1681 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1682 isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
1683 auto *HighAR = cast<SCEVAddRecExpr>(High);
1684 auto *LowAR = cast<SCEVAddRecExpr>(Low);
1685 const Loop *OuterLoop = TheLoop->getParentLoop();
1686 const SCEV *Recur = LowAR->getStepRecurrence(*Exp.getSE());
1687 if (Recur == HighAR->getStepRecurrence(*Exp.getSE()) &&
1688 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1689 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1690 const SCEV *OuterExitCount =
1691 Exp.getSE()->getExitCount(OuterLoop, OuterLoopLatch);
1692 if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1693 OuterExitCount->getType()->isIntegerTy()) {
1694 const SCEV *NewHigh = cast<SCEVAddRecExpr>(High)->evaluateAtIteration(
1695 OuterExitCount, *Exp.getSE());
1696 if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1697 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1698 "outer loop in order to permit hoisting\n");
1699 High = NewHigh;
1700 Low = cast<SCEVAddRecExpr>(Low)->getStart();
1701 // If there is a possibility that the stride is negative then we have
1702 // to generate extra checks to ensure the stride is positive.
1703 if (!Exp.getSE()->isKnownNonNegative(Recur)) {
1704 Stride = Recur;
1705 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1706 "positive: "
1707 << *Stride << '\n');
1708 }
1709 }
1710 }
1711 }
1712 }
1713
1714 Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
1715 End = Exp.expandCodeFor(High, PtrArithTy, Loc);
1716 if (CG->NeedsFreeze) {
1717 IRBuilder<> Builder(Loc);
1718 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1719 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1720 }
1721 Value *StrideVal =
1722 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
1723 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1724 return {Start, End, StrideVal};
1725}
1726
1727/// Turns a collection of checks into a collection of expanded upper and
1728/// lower bounds for both pointers in the check.
1731 Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
1733
1734 // Here we're relying on the SCEV Expander's cache to only emit code for the
1735 // same bounds once.
1736 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1737 [&](const RuntimePointerCheck &Check) {
1738 PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
1740 Second = expandBounds(Check.second, L, Loc, Exp,
1742 return std::make_pair(First, Second);
1743 });
1744
1745 return ChecksWithBounds;
1746}
1747
1749 Instruction *Loc, Loop *TheLoop,
1750 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1751 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1752 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1753 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1754 auto ExpandedChecks =
1755 expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
1756
1757 LLVMContext &Ctx = Loc->getContext();
1758 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1759 Loc->getModule()->getDataLayout());
1760 ChkBuilder.SetInsertPoint(Loc);
1761 // Our instructions might fold to a constant.
1762 Value *MemoryRuntimeCheck = nullptr;
1763
1764 for (const auto &Check : ExpandedChecks) {
1765 const PointerBounds &A = Check.first, &B = Check.second;
1766 // Check if two pointers (A and B) conflict where conflict is computed as:
1767 // start(A) <= end(B) && start(B) <= end(A)
1768
1769 assert((A.Start->getType()->getPointerAddressSpace() ==
1770 B.End->getType()->getPointerAddressSpace()) &&
1771 (B.Start->getType()->getPointerAddressSpace() ==
1772 A.End->getType()->getPointerAddressSpace()) &&
1773 "Trying to bounds check pointers with different address spaces");
1774
1775 // [A|B].Start points to the first accessed byte under base [A|B].
1776 // [A|B].End points to the last accessed byte, plus one.
1777 // There is no conflict when the intervals are disjoint:
1778 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1779 //
1780 // bound0 = (B.Start < A.End)
1781 // bound1 = (A.Start < B.End)
1782 // IsConflict = bound0 & bound1
1783 Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
1784 Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
1785 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1786 if (A.StrideToCheck) {
1787 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1788 A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
1789 "stride.check");
1790 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1791 }
1792 if (B.StrideToCheck) {
1793 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1794 B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
1795 "stride.check");
1796 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1797 }
1798 if (MemoryRuntimeCheck) {
1799 IsConflict =
1800 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1801 }
1802 MemoryRuntimeCheck = IsConflict;
1803 }
1804
1805 return MemoryRuntimeCheck;
1806}
1807
1809 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1810 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1811
1812 LLVMContext &Ctx = Loc->getContext();
1813 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1814 Loc->getModule()->getDataLayout());
1815 ChkBuilder.SetInsertPoint(Loc);
1816 // Our instructions might fold to a constant.
1817 Value *MemoryRuntimeCheck = nullptr;
1818
1819 auto &SE = *Expander.getSE();
1820 // Map to keep track of created compares, The key is the pair of operands for
1821 // the compare, to allow detecting and re-using redundant compares.
1823 for (const auto &C : Checks) {
1824 Type *Ty = C.SinkStart->getType();
1825 // Compute VF * IC * AccessSize.
1826 auto *VFTimesUFTimesSize =
1827 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1828 ConstantInt::get(Ty, IC * C.AccessSize));
1829 Value *Diff = Expander.expandCodeFor(
1830 SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
1831
1832 // Check if the same compare has already been created earlier. In that case,
1833 // there is no need to check it again.
1834 Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
1835 if (IsConflict)
1836 continue;
1837
1838 IsConflict =
1839 ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1840 SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
1841 if (C.NeedsFreeze)
1842 IsConflict =
1843 ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
1844 if (MemoryRuntimeCheck) {
1845 IsConflict =
1846 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1847 }
1848 MemoryRuntimeCheck = IsConflict;
1849 }
1850
1851 return MemoryRuntimeCheck;
1852}
1853
1854std::optional<IVConditionInfo>
1856 const MemorySSA &MSSA, AAResults &AA) {
1857 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
1858 if (!TI || !TI->isConditional())
1859 return {};
1860
1861 auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
1862 // The case with the condition outside the loop should already be handled
1863 // earlier.
1864 if (!CondI || !L.contains(CondI))
1865 return {};
1866
1867 SmallVector<Instruction *> InstToDuplicate;
1868 InstToDuplicate.push_back(CondI);
1869
1870 SmallVector<Value *, 4> WorkList;
1871 WorkList.append(CondI->op_begin(), CondI->op_end());
1872
1873 SmallVector<MemoryAccess *, 4> AccessesToCheck;
1874 SmallVector<MemoryLocation, 4> AccessedLocs;
1875 while (!WorkList.empty()) {
1876 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
1877 if (!I || !L.contains(I))
1878 continue;
1879
1880 // TODO: support additional instructions.
1881 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
1882 return {};
1883
1884 // Do not duplicate volatile and atomic loads.
1885 if (auto *LI = dyn_cast<LoadInst>(I))
1886 if (LI->isVolatile() || LI->isAtomic())
1887 return {};
1888
1889 InstToDuplicate.push_back(I);
1890 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1891 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
1892 // Queue the defining access to check for alias checks.
1893 AccessesToCheck.push_back(MemUse->getDefiningAccess());
1894 AccessedLocs.push_back(MemoryLocation::get(I));
1895 } else {
1896 // MemoryDefs may clobber the location or may be atomic memory
1897 // operations. Bail out.
1898 return {};
1899 }
1900 }
1901 WorkList.append(I->op_begin(), I->op_end());
1902 }
1903
1904 if (InstToDuplicate.empty())
1905 return {};
1906
1907 SmallVector<BasicBlock *, 4> ExitingBlocks;
1908 L.getExitingBlocks(ExitingBlocks);
1909 auto HasNoClobbersOnPath =
1910 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1911 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
1912 SmallVector<MemoryAccess *, 4> AccessesToCheck)
1913 -> std::optional<IVConditionInfo> {
1915 // First, collect all blocks in the loop that are on a patch from Succ
1916 // to the header.
1918 WorkList.push_back(Succ);
1919 WorkList.push_back(Header);
1921 Seen.insert(Header);
1922 Info.PathIsNoop &=
1923 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1924
1925 while (!WorkList.empty()) {
1926 BasicBlock *Current = WorkList.pop_back_val();
1927 if (!L.contains(Current))
1928 continue;
1929 const auto &SeenIns = Seen.insert(Current);
1930 if (!SeenIns.second)
1931 continue;
1932
1933 Info.PathIsNoop &= all_of(
1934 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1935 WorkList.append(succ_begin(Current), succ_end(Current));
1936 }
1937
1938 // Require at least 2 blocks on a path through the loop. This skips
1939 // paths that directly exit the loop.
1940 if (Seen.size() < 2)
1941 return {};
1942
1943 // Next, check if there are any MemoryDefs that are on the path through
1944 // the loop (in the Seen set) and they may-alias any of the locations in
1945 // AccessedLocs. If that is the case, they may modify the condition and
1946 // partial unswitching is not possible.
1947 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
1948 while (!AccessesToCheck.empty()) {
1949 MemoryAccess *Current = AccessesToCheck.pop_back_val();
1950 auto SeenI = SeenAccesses.insert(Current);
1951 if (!SeenI.second || !Seen.contains(Current->getBlock()))
1952 continue;
1953
1954 // Bail out if exceeded the threshold.
1955 if (SeenAccesses.size() >= MSSAThreshold)
1956 return {};
1957
1958 // MemoryUse are read-only accesses.
1959 if (isa<MemoryUse>(Current))
1960 continue;
1961
1962 // For a MemoryDef, check if is aliases any of the location feeding
1963 // the original condition.
1964 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
1965 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
1966 return isModSet(
1967 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
1968 }))
1969 return {};
1970 }
1971
1972 for (Use &U : Current->uses())
1973 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
1974 }
1975
1976 // We could also allow loops with known trip counts without mustprogress,
1977 // but ScalarEvolution may not be available.
1978 Info.PathIsNoop &= isMustProgress(&L);
1979
1980 // If the path is considered a no-op so far, check if it reaches a
1981 // single exit block without any phis. This ensures no values from the
1982 // loop are used outside of the loop.
1983 if (Info.PathIsNoop) {
1984 for (auto *Exiting : ExitingBlocks) {
1985 if (!Seen.contains(Exiting))
1986 continue;
1987 for (auto *Succ : successors(Exiting)) {
1988 if (L.contains(Succ))
1989 continue;
1990
1991 Info.PathIsNoop &= Succ->phis().empty() &&
1992 (!Info.ExitForPath || Info.ExitForPath == Succ);
1993 if (!Info.PathIsNoop)
1994 break;
1995 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
1996 "cannot have multiple exit blocks");
1997 Info.ExitForPath = Succ;
1998 }
1999 }
2000 }
2001 if (!Info.ExitForPath)
2002 Info.PathIsNoop = false;
2003
2004 Info.InstToDuplicate = InstToDuplicate;
2005 return Info;
2006 };
2007
2008 // If we branch to the same successor, partial unswitching will not be
2009 // beneficial.
2010 if (TI->getSuccessor(0) == TI->getSuccessor(1))
2011 return {};
2012
2013 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2014 AccessesToCheck)) {
2015 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2016 return Info;
2017 }
2018 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2019 AccessesToCheck)) {
2020 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2021 return Info;
2022 }
2023
2024 return {};
2025}
amdgpu AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:87
std::string Name
bool End
Definition: ELF_riscv.cpp:478
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(false))
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:821
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1238
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:55
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1661
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1279
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:54
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:203
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:803
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1333
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t High
LLVMContext & Context
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
static constexpr uint32_t Opcode
Definition: aarch32.h:200
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:283
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:437
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:446
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:173
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:207
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:228
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:748
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:777
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:754
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:752
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:771
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:775
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:773
@ ICMP_NE
not equal
Definition: InstrTypes.h:770
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:506
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:291
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:151
Record of a variable value-assignment, aka a non instruction representation of the dbg....
This class represents an Operation in the Expression.
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:313
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:322
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:304
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
CallInst * CreateMulReduce(Value *Src)
Create a vector int mul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:437
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:417
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2235
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2438
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1262
CallInst * CreateAndReduce(Value *Src)
Create a vector int AND reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:441
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1212
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:930
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1108
CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:433
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2513
CallInst * CreateXorReduce(Value *Src)
Create a vector int XOR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:449
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:445
CallInst * CreateFPMinReduce(Value *Src)
Create a vector float min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:469
CallInst * CreateFPMaximumReduce(Value *Src)
Create a vector float maximum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:473
CallInst * CreateFPMaxReduce(Value *Src)
Create a vector float max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:465
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1119
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1474
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:453
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:465
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1496
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1665
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1113
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2251
CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:459
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:425
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1360
CallInst * CreateFPMinimumReduce(Value *Src)
Create a vector float minimum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:477
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2644
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:139
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:93
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1581
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:591
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1037
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:1030
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1391
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1389
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1504
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1397
LLVMContext & getContext() const
Definition: Metadata.h:1200
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:859
A single uniqued string.
Definition: Metadata.h:698
StringRef getString() const
Definition: Metadata.cpp:569
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:559
Tuple of metadata.
Definition: Metadata.h:1433
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:975
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1857
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
Root of the metadata hierarchy.
Definition: Metadata.h:62
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:93
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:390
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:250
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1748
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:848
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1726
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1084
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
Definition: LoopUtils.cpp:1204
Value * createSimpleTargetReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:1094
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1066
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1584
auto successors(const MachineBasicBlock *BB)
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:189
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:425
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
char & LCSSAID
Definition: LCSSA.cpp:507
char & LoopSimplifyID
Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:920
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:974
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:214
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1222
TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:391
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1932
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1733
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:422
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1117
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:482
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1016
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1151
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:427
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:723
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1163
bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
Definition: LoopUtils.cpp:1197
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2782
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1088
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:943
TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:437
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:275
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:278
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:298
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:292
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:284
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:281
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:866
Value * createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right)
See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we are trying to match.
Definition: LoopUtils.cpp:965
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1556
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
DWARFExpression::Operation Op
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1609
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1183
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:898
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1344
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:123
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:1808
ReplaceExitVal
Definition: LoopUtils.h:452
@ UnusedIndVarInLoop
Definition: LoopUtils.h:456
@ OnlyCheapRepl
Definition: LoopUtils.h:454
@ AlwaysRepl
Definition: LoopUtils.h:457
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1855
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1211
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1190
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:991
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1016
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1135
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1626
Value * createAnyOfTargetReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a target reduction of the given vector Src for a reduction of the kind RecurKind::IAnyOf or Re...
Definition: LoopUtils.cpp:1056
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1653
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1654
TrackingVH< Value > End
Definition: LoopUtils.cpp:1655
Value * StrideToCheck
Definition: LoopUtils.cpp:1656
unsigned Ith
Definition: LoopUtils.cpp:1265
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1270
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1266
PHINode * PN
Definition: LoopUtils.cpp:1264
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1267
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:530
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.