LLVM 17.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(PredBB)) {
75 if (isa<IndirectBrInst>(PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Users, [&](User *U) {
132 auto *Use = cast<Instruction>(U);
133 return !L->contains(Use->getParent());
134 }))
135 UsedOutside.push_back(&Inst);
136 }
137
138 return UsedOutside;
139}
140
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
225 if (S && S->getString().equals(StringMD)) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(0, NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(*Width, IsScalable.value_or(false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
278 MDs.push_back(nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(0).get();
294 if (!isa<MDString>(NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.startswith(InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
320 MDs.push_back(Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
341 return FollowupLoopID;
342}
343
346}
347
350}
351
353 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
374 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
428 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
438 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
470 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
471 Value *IncV = PN->getIncomingValue(LatchIdx);
472
473 for (User *U : PN->users())
474 if (U != Cond && U != IncV) return false;
475
476 for (User *U : IncV->users())
477 if (U != Cond && U != PN) return false;
478 return true;
479}
480
481
483 LoopInfo *LI, MemorySSA *MSSA) {
484 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
485 auto *Preheader = L->getLoopPreheader();
486 assert(Preheader && "Preheader should exist!");
487
488 std::unique_ptr<MemorySSAUpdater> MSSAU;
489 if (MSSA)
490 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
491
492 // Now that we know the removal is safe, remove the loop by changing the
493 // branch from the preheader to go to the single exit block.
494 //
495 // Because we're deleting a large chunk of code at once, the sequence in which
496 // we remove things is very important to avoid invalidation issues.
497
498 // Tell ScalarEvolution that the loop is deleted. Do this before
499 // deleting the loop so that ScalarEvolution can look at the loop
500 // to determine what it needs to clean up.
501 if (SE) {
502 SE->forgetLoop(L);
504 }
505
506 Instruction *OldTerm = Preheader->getTerminator();
507 assert(!OldTerm->mayHaveSideEffects() &&
508 "Preheader must end with a side-effect-free terminator");
509 assert(OldTerm->getNumSuccessors() == 1 &&
510 "Preheader must have a single successor");
511 // Connect the preheader to the exit block. Keep the old edge to the header
512 // around to perform the dominator tree update in two separate steps
513 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
514 // preheader -> header.
515 //
516 //
517 // 0. Preheader 1. Preheader 2. Preheader
518 // | | | |
519 // V | V |
520 // Header <--\ | Header <--\ | Header <--\
521 // | | | | | | | | | | |
522 // | V | | | V | | | V |
523 // | Body --/ | | Body --/ | | Body --/
524 // V V V V V
525 // Exit Exit Exit
526 //
527 // By doing this is two separate steps we can perform the dominator tree
528 // update without using the batch update API.
529 //
530 // Even when the loop is never executed, we cannot remove the edge from the
531 // source block to the exit block. Consider the case where the unexecuted loop
532 // branches back to an outer loop. If we deleted the loop and removed the edge
533 // coming to this inner loop, this will break the outer loop structure (by
534 // deleting the backedge of the outer loop). If the outer loop is indeed a
535 // non-loop, it will be deleted in a future iteration of loop deletion pass.
536 IRBuilder<> Builder(OldTerm);
537
538 auto *ExitBlock = L->getUniqueExitBlock();
539 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
540 if (ExitBlock) {
541 assert(ExitBlock && "Should have a unique exit block!");
542 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
543
544 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
545 // Remove the old branch. The conditional branch becomes a new terminator.
546 OldTerm->eraseFromParent();
547
548 // Rewrite phis in the exit block to get their inputs from the Preheader
549 // instead of the exiting block.
550 for (PHINode &P : ExitBlock->phis()) {
551 // Set the zero'th element of Phi to be from the preheader and remove all
552 // other incoming values. Given the loop has dedicated exits, all other
553 // incoming values must be from the exiting blocks.
554 int PredIndex = 0;
555 P.setIncomingBlock(PredIndex, Preheader);
556 // Removes all incoming values from all other exiting blocks (including
557 // duplicate values from an exiting block).
558 // Nuke all entries except the zero'th entry which is the preheader entry.
559 // NOTE! We need to remove Incoming Values in the reverse order as done
560 // below, to keep the indices valid for deletion (removeIncomingValues
561 // updates getNumIncomingValues and shifts all values down into the
562 // operand being deleted).
563 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
564 P.removeIncomingValue(e - i, false);
565
566 assert((P.getNumIncomingValues() == 1 &&
567 P.getIncomingBlock(PredIndex) == Preheader) &&
568 "Should have exactly one value and that's from the preheader!");
569 }
570
571 if (DT) {
572 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
573 if (MSSA) {
574 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
575 *DT);
576 if (VerifyMemorySSA)
577 MSSA->verifyMemorySSA();
578 }
579 }
580
581 // Disconnect the loop body by branching directly to its exit.
582 Builder.SetInsertPoint(Preheader->getTerminator());
583 Builder.CreateBr(ExitBlock);
584 // Remove the old branch.
585 Preheader->getTerminator()->eraseFromParent();
586 } else {
587 assert(L->hasNoExitBlocks() &&
588 "Loop should have either zero or one exit blocks.");
589
590 Builder.SetInsertPoint(OldTerm);
591 Builder.CreateUnreachable();
592 Preheader->getTerminator()->eraseFromParent();
593 }
594
595 if (DT) {
596 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
597 if (MSSA) {
598 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
599 *DT);
600 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
601 L->block_end());
602 MSSAU->removeBlocks(DeadBlockSet);
603 if (VerifyMemorySSA)
604 MSSA->verifyMemorySSA();
605 }
606 }
607
608 // Use a map to unique and a vector to guarantee deterministic ordering.
611
612 if (ExitBlock) {
613 // Given LCSSA form is satisfied, we should not have users of instructions
614 // within the dead loop outside of the loop. However, LCSSA doesn't take
615 // unreachable uses into account. We handle them here.
616 // We could do it after drop all references (in this case all users in the
617 // loop will be already eliminated and we have less work to do but according
618 // to API doc of User::dropAllReferences only valid operation after dropping
619 // references, is deletion. So let's substitute all usages of
620 // instruction from the loop with poison value of corresponding type first.
621 for (auto *Block : L->blocks())
622 for (Instruction &I : *Block) {
623 auto *Poison = PoisonValue::get(I.getType());
624 for (Use &U : llvm::make_early_inc_range(I.uses())) {
625 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
626 if (L->contains(Usr->getParent()))
627 continue;
628 // If we have a DT then we can check that uses outside a loop only in
629 // unreachable block.
630 if (DT)
632 "Unexpected user in reachable block");
633 U.set(Poison);
634 }
635 auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
636 if (!DVI)
637 continue;
638 if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
639 continue;
640 DeadDebugInst.push_back(DVI);
641 }
642
643 // After the loop has been deleted all the values defined and modified
644 // inside the loop are going to be unavailable.
645 // Since debug values in the loop have been deleted, inserting an undef
646 // dbg.value truncates the range of any dbg.value before the loop where the
647 // loop used to be. This is particularly important for constant values.
648 Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
649 assert(InsertDbgValueBefore &&
650 "There should be a non-PHI instruction in exit block, else these "
651 "instructions will have no parent.");
652 for (auto *DVI : DeadDebugInst) {
653 DVI->setKillLocation();
654 DVI->moveBefore(InsertDbgValueBefore);
655 }
656 }
657
658 // Remove the block from the reference counting scheme, so that we can
659 // delete it freely later.
660 for (auto *Block : L->blocks())
661 Block->dropAllReferences();
662
663 if (MSSA && VerifyMemorySSA)
664 MSSA->verifyMemorySSA();
665
666 if (LI) {
667 // Erase the instructions and the blocks without having to worry
668 // about ordering because we already dropped the references.
669 // NOTE: This iteration is safe because erasing the block does not remove
670 // its entry from the loop's block list. We do that in the next section.
671 for (BasicBlock *BB : L->blocks())
672 BB->eraseFromParent();
673
674 // Finally, the blocks from loopinfo. This has to happen late because
675 // otherwise our loop iterators won't work.
676
678 blocks.insert(L->block_begin(), L->block_end());
679 for (BasicBlock *BB : blocks)
680 LI->removeBlock(BB);
681
682 // The last step is to update LoopInfo now that we've eliminated this loop.
683 // Note: LoopInfo::erase remove the given loop and relink its subloops with
684 // its parent. While removeLoop/removeChildLoop remove the given loop but
685 // not relink its subloops, which is what we want.
686 if (Loop *ParentLoop = L->getParentLoop()) {
687 Loop::iterator I = find(*ParentLoop, L);
688 assert(I != ParentLoop->end() && "Couldn't find loop");
689 ParentLoop->removeChildLoop(I);
690 } else {
691 Loop::iterator I = find(*LI, L);
692 assert(I != LI->end() && "Couldn't find loop");
693 LI->removeLoop(I);
694 }
695 LI->destroy(L);
696 }
697}
698
700 LoopInfo &LI, MemorySSA *MSSA) {
701 auto *Latch = L->getLoopLatch();
702 assert(Latch && "multiple latches not yet supported");
703 auto *Header = L->getHeader();
704 Loop *OutermostLoop = L->getOutermostLoop();
705
706 SE.forgetLoop(L);
708
709 std::unique_ptr<MemorySSAUpdater> MSSAU;
710 if (MSSA)
711 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
712
713 // Update the CFG and domtree. We chose to special case a couple of
714 // of common cases for code quality and test readability reasons.
715 [&]() -> void {
716 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
717 if (!BI->isConditional()) {
718 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
719 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
720 MSSAU.get());
721 return;
722 }
723
724 // Conditional latch/exit - note that latch can be shared by inner
725 // and outer loop so the other target doesn't need to an exit
726 if (L->isLoopExiting(Latch)) {
727 // TODO: Generalize ConstantFoldTerminator so that it can be used
728 // here without invalidating LCSSA or MemorySSA. (Tricky case for
729 // LCSSA: header is an exit block of a preceeding sibling loop w/o
730 // dedicated exits.)
731 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
732 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
733
734 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
735 Header->removePredecessor(Latch, true);
736
738 auto *NewBI = Builder.CreateBr(ExitBB);
739 // Transfer the metadata to the new branch instruction (minus the
740 // loop info since this is no longer a loop)
741 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
742 LLVMContext::MD_annotation});
743
744 BI->eraseFromParent();
745 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
746 if (MSSA)
747 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
748 return;
749 }
750 }
751
752 // General case. By splitting the backedge, and then explicitly making it
753 // unreachable we gracefully handle corner cases such as switch and invoke
754 // termiantors.
755 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
756
757 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
758 (void)changeToUnreachable(BackedgeBB->getTerminator(),
759 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
760 }();
761
762 // Erase (and destroy) this loop instance. Handles relinking sub-loops
763 // and blocks within the loop as needed.
764 LI.erase(L);
765
766 // If the loop we broke had a parent, then changeToUnreachable might have
767 // caused a block to be removed from the parent loop (see loop_nest_lcssa
768 // test case in zero-btc.ll for an example), thus changing the parent's
769 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
770 // loop which might have a had a block removed.
771 if (OutermostLoop != L)
772 formLCSSARecursively(*OutermostLoop, DT, &LI);
773}
774
775
776/// Checks if \p L has an exiting latch branch. There may also be other
777/// exiting blocks. Returns branch instruction terminating the loop
778/// latch if above check is successful, nullptr otherwise.
780 BasicBlock *Latch = L->getLoopLatch();
781 if (!Latch)
782 return nullptr;
783
784 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
785 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
786 return nullptr;
787
788 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
789 LatchBR->getSuccessor(1) == L->getHeader()) &&
790 "At least one edge out of the latch must go to the header");
791
792 return LatchBR;
793}
794
795/// Return the estimated trip count for any exiting branch which dominates
796/// the loop latch.
797static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
798 Loop *L,
799 uint64_t &OrigExitWeight) {
800 // To estimate the number of times the loop body was executed, we want to
801 // know the number of times the backedge was taken, vs. the number of times
802 // we exited the loop.
803 uint64_t LoopWeight, ExitWeight;
804 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
805 return std::nullopt;
806
807 if (L->contains(ExitingBranch->getSuccessor(1)))
808 std::swap(LoopWeight, ExitWeight);
809
810 if (!ExitWeight)
811 // Don't have a way to return predicated infinite
812 return std::nullopt;
813
814 OrigExitWeight = ExitWeight;
815
816 // Estimated exit count is a ratio of the loop weight by the weight of the
817 // edge exiting the loop, rounded to nearest.
818 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
819 // Estimated trip count is one plus estimated exit count.
820 return ExitCount + 1;
821}
822
823std::optional<unsigned>
825 unsigned *EstimatedLoopInvocationWeight) {
826 // Currently we take the estimate exit count only from the loop latch,
827 // ignoring other exiting blocks. This can overestimate the trip count
828 // if we exit through another exit, but can never underestimate it.
829 // TODO: incorporate information from other exits
830 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
831 uint64_t ExitWeight;
832 if (std::optional<uint64_t> EstTripCount =
833 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
834 if (EstimatedLoopInvocationWeight)
835 *EstimatedLoopInvocationWeight = ExitWeight;
836 return *EstTripCount;
837 }
838 }
839 return std::nullopt;
840}
841
842bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
843 unsigned EstimatedloopInvocationWeight) {
844 // At the moment, we currently support changing the estimate trip count of
845 // the latch branch only. We could extend this API to manipulate estimated
846 // trip counts for any exit.
848 if (!LatchBranch)
849 return false;
850
851 // Calculate taken and exit weights.
852 unsigned LatchExitWeight = 0;
853 unsigned BackedgeTakenWeight = 0;
854
855 if (EstimatedTripCount > 0) {
856 LatchExitWeight = EstimatedloopInvocationWeight;
857 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
858 }
859
860 // Make a swap if back edge is taken when condition is "false".
861 if (LatchBranch->getSuccessor(0) != L->getHeader())
862 std::swap(BackedgeTakenWeight, LatchExitWeight);
863
864 MDBuilder MDB(LatchBranch->getContext());
865
866 // Set/Update profile metadata.
867 LatchBranch->setMetadata(
868 LLVMContext::MD_prof,
869 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
870
871 return true;
872}
873
875 ScalarEvolution &SE) {
876 Loop *OuterL = InnerLoop->getParentLoop();
877 if (!OuterL)
878 return true;
879
880 // Get the backedge taken count for the inner loop
881 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
882 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
883 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
884 !InnerLoopBECountSC->getType()->isIntegerTy())
885 return false;
886
887 // Get whether count is invariant to the outer loop
889 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
891 return false;
892
893 return true;
894}
895
897 switch (RK) {
898 default:
899 llvm_unreachable("Unknown min/max recurrence kind");
900 case RecurKind::UMin:
901 return Intrinsic::umin;
902 case RecurKind::UMax:
903 return Intrinsic::umax;
904 case RecurKind::SMin:
905 return Intrinsic::smin;
906 case RecurKind::SMax:
907 return Intrinsic::smax;
908 case RecurKind::FMin:
909 return Intrinsic::minnum;
910 case RecurKind::FMax:
911 return Intrinsic::maxnum;
912 }
913}
914
916 switch (RK) {
917 default:
918 llvm_unreachable("Unknown min/max recurrence kind");
919 case RecurKind::UMin:
920 return CmpInst::ICMP_ULT;
921 case RecurKind::UMax:
922 return CmpInst::ICMP_UGT;
923 case RecurKind::SMin:
924 return CmpInst::ICMP_SLT;
925 case RecurKind::SMax:
926 return CmpInst::ICMP_SGT;
927 case RecurKind::FMin:
928 return CmpInst::FCMP_OLT;
929 case RecurKind::FMax:
930 return CmpInst::FCMP_OGT;
931 }
932}
933
935 RecurKind RK, Value *Left, Value *Right) {
936 if (auto VTy = dyn_cast<VectorType>(Left->getType()))
937 StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
938 Value *Cmp =
939 Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
940 return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
941}
942
944 Value *Right) {
945 Type *Ty = Left->getType();
946 if (Ty->isIntOrIntVectorTy()) {
947 // TODO: Add float minnum/maxnum support when FMF nnan is set.
949 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
950 "rdx.minmax");
951 }
953 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
954 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
955 return Select;
956}
957
958// Helper to generate an ordered reduction.
960 unsigned Op, RecurKind RdxKind) {
961 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
962
963 // Extract and apply reduction ops in ascending order:
964 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
965 Value *Result = Acc;
966 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
967 Value *Ext =
968 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
969
970 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
971 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
972 "bin.rdx");
973 } else {
975 "Invalid min/max");
976 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
977 }
978 }
979
980 return Result;
981}
982
983// Helper to generate a log2 shuffle reduction.
985 unsigned Op, RecurKind RdxKind) {
986 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
987 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
988 // and vector ops, reducing the set of values being computed by half each
989 // round.
990 assert(isPowerOf2_32(VF) &&
991 "Reduction emission only supported for pow2 vectors!");
992 // Note: fast-math-flags flags are controlled by the builder configuration
993 // and are assumed to apply to all generated arithmetic instructions. Other
994 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
995 // of the builder configuration, and since they're not passed explicitly,
996 // will never be relevant here. Note that it would be generally unsound to
997 // propagate these from an intrinsic call to the expansion anyways as we/
998 // change the order of operations.
999 Value *TmpVec = Src;
1000 SmallVector<int, 32> ShuffleMask(VF);
1001 for (unsigned i = VF; i != 1; i >>= 1) {
1002 // Move the upper half of the vector to the lower half.
1003 for (unsigned j = 0; j != i / 2; ++j)
1004 ShuffleMask[j] = i / 2 + j;
1005
1006 // Fill the rest of the mask with undef.
1007 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1008
1009 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1010
1011 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1012 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1013 "bin.rdx");
1014 } else {
1016 "Invalid min/max");
1017 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1018 }
1019 }
1020 // The result is in the first element of the vector.
1021 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1022}
1023
1025 const TargetTransformInfo *TTI,
1026 Value *Src,
1027 const RecurrenceDescriptor &Desc,
1028 PHINode *OrigPhi) {
1030 Desc.getRecurrenceKind()) &&
1031 "Unexpected reduction kind");
1032 Value *InitVal = Desc.getRecurrenceStartValue();
1033 Value *NewVal = nullptr;
1034
1035 // First use the original phi to determine the new value we're trying to
1036 // select from in the loop.
1037 SelectInst *SI = nullptr;
1038 for (auto *U : OrigPhi->users()) {
1039 if ((SI = dyn_cast<SelectInst>(U)))
1040 break;
1041 }
1042 assert(SI && "One user of the original phi should be a select");
1043
1044 if (SI->getTrueValue() == OrigPhi)
1045 NewVal = SI->getFalseValue();
1046 else {
1047 assert(SI->getFalseValue() == OrigPhi &&
1048 "At least one input to the select should be the original Phi");
1049 NewVal = SI->getTrueValue();
1050 }
1051
1052 // Create a splat vector with the new value and compare this to the vector
1053 // we want to reduce.
1054 ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1055 Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1056 Value *Cmp =
1057 Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1058
1059 // If any predicate is true it means that we want to select the new value.
1060 Cmp = Builder.CreateOrReduce(Cmp);
1061 return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1062}
1063
1065 const TargetTransformInfo *TTI,
1066 Value *Src, RecurKind RdxKind) {
1067 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1068 switch (RdxKind) {
1069 case RecurKind::Add:
1070 return Builder.CreateAddReduce(Src);
1071 case RecurKind::Mul:
1072 return Builder.CreateMulReduce(Src);
1073 case RecurKind::And:
1074 return Builder.CreateAndReduce(Src);
1075 case RecurKind::Or:
1076 return Builder.CreateOrReduce(Src);
1077 case RecurKind::Xor:
1078 return Builder.CreateXorReduce(Src);
1079 case RecurKind::FMulAdd:
1080 case RecurKind::FAdd:
1081 return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1082 Src);
1083 case RecurKind::FMul:
1084 return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1085 case RecurKind::SMax:
1086 return Builder.CreateIntMaxReduce(Src, true);
1087 case RecurKind::SMin:
1088 return Builder.CreateIntMinReduce(Src, true);
1089 case RecurKind::UMax:
1090 return Builder.CreateIntMaxReduce(Src, false);
1091 case RecurKind::UMin:
1092 return Builder.CreateIntMinReduce(Src, false);
1093 case RecurKind::FMax:
1094 return Builder.CreateFPMaxReduce(Src);
1095 case RecurKind::FMin:
1096 return Builder.CreateFPMinReduce(Src);
1097 default:
1098 llvm_unreachable("Unhandled opcode");
1099 }
1100}
1101
1103 const TargetTransformInfo *TTI,
1104 const RecurrenceDescriptor &Desc, Value *Src,
1105 PHINode *OrigPhi) {
1106 // TODO: Support in-order reductions based on the recurrence descriptor.
1107 // All ops in the reduction inherit fast-math-flags from the recurrence
1108 // descriptor.
1110 B.setFastMathFlags(Desc.getFastMathFlags());
1111
1112 RecurKind RK = Desc.getRecurrenceKind();
1114 return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
1115
1116 return createSimpleTargetReduction(B, TTI, Src, RK);
1117}
1118
1120 const RecurrenceDescriptor &Desc,
1121 Value *Src, Value *Start) {
1122 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1123 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1124 "Unexpected reduction kind");
1125 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1126 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1127
1128 return B.CreateFAddReduce(Start, Src);
1129}
1130
1132 bool IncludeWrapFlags) {
1133 auto *VecOp = dyn_cast<Instruction>(I);
1134 if (!VecOp)
1135 return;
1136 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1137 : dyn_cast<Instruction>(OpValue);
1138 if (!Intersection)
1139 return;
1140 const unsigned Opcode = Intersection->getOpcode();
1141 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1142 for (auto *V : VL) {
1143 auto *Instr = dyn_cast<Instruction>(V);
1144 if (!Instr)
1145 continue;
1146 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1147 VecOp->andIRFlags(V);
1148 }
1149}
1150
1151bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1152 ScalarEvolution &SE) {
1153 const SCEV *Zero = SE.getZero(S->getType());
1154 return SE.isAvailableAtLoopEntry(S, L) &&
1155 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1156}
1157
1159 ScalarEvolution &SE) {
1160 const SCEV *Zero = SE.getZero(S->getType());
1161 return SE.isAvailableAtLoopEntry(S, L) &&
1162 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1163}
1164
1165bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1166 ScalarEvolution &SE) {
1167 const SCEV *Zero = SE.getZero(S->getType());
1168 return SE.isAvailableAtLoopEntry(S, L) &&
1169 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1170}
1171
1173 ScalarEvolution &SE) {
1174 const SCEV *Zero = SE.getZero(S->getType());
1175 return SE.isAvailableAtLoopEntry(S, L) &&
1176 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1177}
1178
1180 bool Signed) {
1181 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1184 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1185 return SE.isAvailableAtLoopEntry(S, L) &&
1186 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1187 SE.getConstant(Min));
1188}
1189
1191 bool Signed) {
1192 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1195 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1196 return SE.isAvailableAtLoopEntry(S, L) &&
1197 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1198 SE.getConstant(Max));
1199}
1200
1201//===----------------------------------------------------------------------===//
1202// rewriteLoopExitValues - Optimize IV users outside the loop.
1203// As a side effect, reduces the amount of IV processing within the loop.
1204//===----------------------------------------------------------------------===//
1205
1206static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1209 Visited.insert(I);
1210 WorkList.push_back(I);
1211 while (!WorkList.empty()) {
1212 const Instruction *Curr = WorkList.pop_back_val();
1213 // This use is outside the loop, nothing to do.
1214 if (!L->contains(Curr))
1215 continue;
1216 // Do we assume it is a "hard" use which will not be eliminated easily?
1217 if (Curr->mayHaveSideEffects())
1218 return true;
1219 // Otherwise, add all its users to worklist.
1220 for (const auto *U : Curr->users()) {
1221 auto *UI = cast<Instruction>(U);
1222 if (Visited.insert(UI).second)
1223 WorkList.push_back(UI);
1224 }
1225 }
1226 return false;
1227}
1228
1229// Collect information about PHI nodes which can be transformed in
1230// rewriteLoopExitValues.
1232 PHINode *PN; // For which PHI node is this replacement?
1233 unsigned Ith; // For which incoming value?
1234 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1235 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1236 bool HighCost; // Is this expansion a high-cost?
1237
1238 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1239 bool H)
1240 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1241 HighCost(H) {}
1242};
1243
1244// Check whether it is possible to delete the loop after rewriting exit
1245// value. If it is possible, ignore ReplaceExitValue and do rewriting
1246// aggressively.
1247static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1248 BasicBlock *Preheader = L->getLoopPreheader();
1249 // If there is no preheader, the loop will not be deleted.
1250 if (!Preheader)
1251 return false;
1252
1253 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1254 // We obviate multiple ExitingBlocks case for simplicity.
1255 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1256 // after exit value rewriting, we can enhance the logic here.
1257 SmallVector<BasicBlock *, 4> ExitingBlocks;
1258 L->getExitingBlocks(ExitingBlocks);
1260 L->getUniqueExitBlocks(ExitBlocks);
1261 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1262 return false;
1263
1264 BasicBlock *ExitBlock = ExitBlocks[0];
1265 BasicBlock::iterator BI = ExitBlock->begin();
1266 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1267 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1268
1269 // If the Incoming value of P is found in RewritePhiSet, we know it
1270 // could be rewritten to use a loop invariant value in transformation
1271 // phase later. Skip it in the loop invariant check below.
1272 bool found = false;
1273 for (const RewritePhi &Phi : RewritePhiSet) {
1274 unsigned i = Phi.Ith;
1275 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1276 found = true;
1277 break;
1278 }
1279 }
1280
1281 Instruction *I;
1282 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1283 if (!L->hasLoopInvariantOperands(I))
1284 return false;
1285
1286 ++BI;
1287 }
1288
1289 for (auto *BB : L->blocks())
1290 if (llvm::any_of(*BB, [](Instruction &I) {
1291 return I.mayHaveSideEffects();
1292 }))
1293 return false;
1294
1295 return true;
1296}
1297
1298/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1299/// and returns true if this Phi is an induction phi in the loop. When
1300/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1301static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1303 if (!Phi)
1304 return false;
1305 if (!L->getLoopPreheader())
1306 return false;
1307 if (Phi->getParent() != L->getHeader())
1308 return false;
1309 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1310}
1311
1313 ScalarEvolution *SE,
1314 const TargetTransformInfo *TTI,
1315 SCEVExpander &Rewriter, DominatorTree *DT,
1318 // Check a pre-condition.
1319 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1320 "Indvars did not preserve LCSSA!");
1321
1322 SmallVector<BasicBlock*, 8> ExitBlocks;
1323 L->getUniqueExitBlocks(ExitBlocks);
1324
1325 SmallVector<RewritePhi, 8> RewritePhiSet;
1326 // Find all values that are computed inside the loop, but used outside of it.
1327 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1328 // the exit blocks of the loop to find them.
1329 for (BasicBlock *ExitBB : ExitBlocks) {
1330 // If there are no PHI nodes in this exit block, then no values defined
1331 // inside the loop are used on this path, skip it.
1332 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1333 if (!PN) continue;
1334
1335 unsigned NumPreds = PN->getNumIncomingValues();
1336
1337 // Iterate over all of the PHI nodes.
1338 BasicBlock::iterator BBI = ExitBB->begin();
1339 while ((PN = dyn_cast<PHINode>(BBI++))) {
1340 if (PN->use_empty())
1341 continue; // dead use, don't replace it
1342
1343 if (!SE->isSCEVable(PN->getType()))
1344 continue;
1345
1346 // Iterate over all of the values in all the PHI nodes.
1347 for (unsigned i = 0; i != NumPreds; ++i) {
1348 // If the value being merged in is not integer or is not defined
1349 // in the loop, skip it.
1350 Value *InVal = PN->getIncomingValue(i);
1351 if (!isa<Instruction>(InVal))
1352 continue;
1353
1354 // If this pred is for a subloop, not L itself, skip it.
1355 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1356 continue; // The Block is in a subloop, skip it.
1357
1358 // Check that InVal is defined in the loop.
1359 Instruction *Inst = cast<Instruction>(InVal);
1360 if (!L->contains(Inst))
1361 continue;
1362
1363 // Find exit values which are induction variables in the loop, and are
1364 // unused in the loop, with the only use being the exit block PhiNode,
1365 // and the induction variable update binary operator.
1366 // The exit value can be replaced with the final value when it is cheap
1367 // to do so.
1370 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1371 if (IndPhi) {
1372 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1373 continue;
1374 // This is an induction PHI. Check that the only users are PHI
1375 // nodes, and induction variable update binary operators.
1376 if (llvm::any_of(Inst->users(), [&](User *U) {
1377 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1378 return true;
1379 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1380 if (B && B != ID.getInductionBinOp())
1381 return true;
1382 return false;
1383 }))
1384 continue;
1385 } else {
1386 // If it is not an induction phi, it must be an induction update
1387 // binary operator with an induction phi user.
1388 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1389 if (!B)
1390 continue;
1391 if (llvm::any_of(Inst->users(), [&](User *U) {
1392 PHINode *Phi = dyn_cast<PHINode>(U);
1393 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1394 return true;
1395 return false;
1396 }))
1397 continue;
1398 if (B != ID.getInductionBinOp())
1399 continue;
1400 }
1401 }
1402
1403 // Okay, this instruction has a user outside of the current loop
1404 // and varies predictably *inside* the loop. Evaluate the value it
1405 // contains when the loop exits, if possible. We prefer to start with
1406 // expressions which are true for all exits (so as to maximize
1407 // expression reuse by the SCEVExpander), but resort to per-exit
1408 // evaluation if that fails.
1409 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1410 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1411 !SE->isLoopInvariant(ExitValue, L) ||
1412 !Rewriter.isSafeToExpand(ExitValue)) {
1413 // TODO: This should probably be sunk into SCEV in some way; maybe a
1414 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1415 // most SCEV expressions and other recurrence types (e.g. shift
1416 // recurrences). Is there existing code we can reuse?
1417 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1418 if (isa<SCEVCouldNotCompute>(ExitCount))
1419 continue;
1420 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1421 if (AddRec->getLoop() == L)
1422 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1423 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1424 !SE->isLoopInvariant(ExitValue, L) ||
1425 !Rewriter.isSafeToExpand(ExitValue))
1426 continue;
1427 }
1428
1429 // Computing the value outside of the loop brings no benefit if it is
1430 // definitely used inside the loop in a way which can not be optimized
1431 // away. Avoid doing so unless we know we have a value which computes
1432 // the ExitValue already. TODO: This should be merged into SCEV
1433 // expander to leverage its knowledge of existing expressions.
1434 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1435 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1436 continue;
1437
1438 // Check if expansions of this SCEV would count as being high cost.
1439 bool HighCost = Rewriter.isHighCostExpansion(
1440 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1441
1442 // Note that we must not perform expansions until after
1443 // we query *all* the costs, because if we perform temporary expansion
1444 // inbetween, one that we might not intend to keep, said expansion
1445 // *may* affect cost calculation of the the next SCEV's we'll query,
1446 // and next SCEV may errneously get smaller cost.
1447
1448 // Collect all the candidate PHINodes to be rewritten.
1449 Instruction *InsertPt =
1450 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1451 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1452 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1453 }
1454 }
1455 }
1456
1457 // TODO: evaluate whether it is beneficial to change how we calculate
1458 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1459 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1460 // potentially giving cost bonus to those other SCEV's?
1461
1462 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1463 int NumReplaced = 0;
1464
1465 // Transformation.
1466 for (const RewritePhi &Phi : RewritePhiSet) {
1467 PHINode *PN = Phi.PN;
1468
1469 // Only do the rewrite when the ExitValue can be expanded cheaply.
1470 // If LoopCanBeDel is true, rewrite exit value aggressively.
1473 !LoopCanBeDel && Phi.HighCost)
1474 continue;
1475
1476 Value *ExitVal = Rewriter.expandCodeFor(
1477 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1478
1479 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1480 << '\n'
1481 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1482
1483#ifndef NDEBUG
1484 // If we reuse an instruction from a loop which is neither L nor one of
1485 // its containing loops, we end up breaking LCSSA form for this loop by
1486 // creating a new use of its instruction.
1487 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1488 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1489 if (EVL != L)
1490 assert(EVL->contains(L) && "LCSSA breach detected!");
1491#endif
1492
1493 NumReplaced++;
1494 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1495 PN->setIncomingValue(Phi.Ith, ExitVal);
1496 // It's necessary to tell ScalarEvolution about this explicitly so that
1497 // it can walk the def-use list and forget all SCEVs, as it may not be
1498 // watching the PHI itself. Once the new exit value is in place, there
1499 // may not be a def-use connection between the loop and every instruction
1500 // which got a SCEVAddRecExpr for that loop.
1501 SE->forgetValue(PN);
1502
1503 // If this instruction is dead now, delete it. Don't do it now to avoid
1504 // invalidating iterators.
1505 if (isInstructionTriviallyDead(Inst, TLI))
1506 DeadInsts.push_back(Inst);
1507
1508 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1509 if (PN->getNumIncomingValues() == 1 &&
1510 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1511 PN->replaceAllUsesWith(ExitVal);
1512 PN->eraseFromParent();
1513 }
1514 }
1515
1516 // The insertion point instruction may have been deleted; clear it out
1517 // so that the rewriter doesn't trip over it later.
1518 Rewriter.clearInsertPoint();
1519 return NumReplaced;
1520}
1521
1522/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1523/// \p OrigLoop.
1524void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1525 Loop *RemainderLoop, uint64_t UF) {
1526 assert(UF > 0 && "Zero unrolled factor is not supported");
1527 assert(UnrolledLoop != RemainderLoop &&
1528 "Unrolled and Remainder loops are expected to distinct");
1529
1530 // Get number of iterations in the original scalar loop.
1531 unsigned OrigLoopInvocationWeight = 0;
1532 std::optional<unsigned> OrigAverageTripCount =
1533 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1534 if (!OrigAverageTripCount)
1535 return;
1536
1537 // Calculate number of iterations in unrolled loop.
1538 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1539 // Calculate number of iterations for remainder loop.
1540 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1541
1542 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1543 OrigLoopInvocationWeight);
1544 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1545 OrigLoopInvocationWeight);
1546}
1547
1548/// Utility that implements appending of loops onto a worklist.
1549/// Loops are added in preorder (analogous for reverse postorder for trees),
1550/// and the worklist is processed LIFO.
1551template <typename RangeT>
1553 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1554 // We use an internal worklist to build up the preorder traversal without
1555 // recursion.
1556 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1557
1558 // We walk the initial sequence of loops in reverse because we generally want
1559 // to visit defs before uses and the worklist is LIFO.
1560 for (Loop *RootL : Loops) {
1561 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1562 assert(PreOrderWorklist.empty() &&
1563 "Must start with an empty preorder walk worklist.");
1564 PreOrderWorklist.push_back(RootL);
1565 do {
1566 Loop *L = PreOrderWorklist.pop_back_val();
1567 PreOrderWorklist.append(L->begin(), L->end());
1568 PreOrderLoops.push_back(L);
1569 } while (!PreOrderWorklist.empty());
1570
1571 Worklist.insert(std::move(PreOrderLoops));
1572 PreOrderLoops.clear();
1573 }
1574}
1575
1576template <typename RangeT>
1580}
1581
1582template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1584
1585template void
1586llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1588
1591 appendReversedLoopsToWorklist(LI, Worklist);
1592}
1593
1595 LoopInfo *LI, LPPassManager *LPM) {
1596 Loop &New = *LI->AllocateLoop();
1597 if (PL)
1598 PL->addChildLoop(&New);
1599 else
1600 LI->addTopLevelLoop(&New);
1601
1602 if (LPM)
1603 LPM->addLoop(New);
1604
1605 // Add all of the blocks in L to the new loop.
1606 for (BasicBlock *BB : L->blocks())
1607 if (LI->getLoopFor(BB) == L)
1608 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1609
1610 // Add all of the subloops to the new loop.
1611 for (Loop *I : *L)
1612 cloneLoop(I, &New, VM, LI, LPM);
1613
1614 return &New;
1615}
1616
1617/// IR Values for the lower and upper bounds of a pointer evolution. We
1618/// need to use value-handles because SCEV expansion can invalidate previously
1619/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1620/// a previous one.
1624};
1625
1626/// Expand code for the lower and upper bound of the pointer group \p CG
1627/// in \p TheLoop. \return the values for the bounds.
1629 Loop *TheLoop, Instruction *Loc,
1630 SCEVExpander &Exp) {
1631 LLVMContext &Ctx = Loc->getContext();
1632 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
1633
1634 Value *Start = nullptr, *End = nullptr;
1635 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1636 Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
1637 End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
1638 if (CG->NeedsFreeze) {
1639 IRBuilder<> Builder(Loc);
1640 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1641 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1642 }
1643 LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
1644 return {Start, End};
1645}
1646
1647/// Turns a collection of checks into a collection of expanded upper and
1648/// lower bounds for both pointers in the check.
1651 Instruction *Loc, SCEVExpander &Exp) {
1653
1654 // Here we're relying on the SCEV Expander's cache to only emit code for the
1655 // same bounds once.
1656 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1657 [&](const RuntimePointerCheck &Check) {
1658 PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
1659 Second = expandBounds(Check.second, L, Loc, Exp);
1660 return std::make_pair(First, Second);
1661 });
1662
1663 return ChecksWithBounds;
1664}
1665
1667 Instruction *Loc, Loop *TheLoop,
1668 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1669 SCEVExpander &Exp) {
1670 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1671 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1672 auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
1673
1674 LLVMContext &Ctx = Loc->getContext();
1675 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1676 Loc->getModule()->getDataLayout());
1677 ChkBuilder.SetInsertPoint(Loc);
1678 // Our instructions might fold to a constant.
1679 Value *MemoryRuntimeCheck = nullptr;
1680
1681 for (const auto &Check : ExpandedChecks) {
1682 const PointerBounds &A = Check.first, &B = Check.second;
1683 // Check if two pointers (A and B) conflict where conflict is computed as:
1684 // start(A) <= end(B) && start(B) <= end(A)
1685 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
1686 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
1687
1688 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
1689 (AS1 == A.End->getType()->getPointerAddressSpace()) &&
1690 "Trying to bounds check pointers with different address spaces");
1691
1692 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1693 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1694
1695 Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
1696 Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
1697 Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
1698 Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
1699
1700 // [A|B].Start points to the first accessed byte under base [A|B].
1701 // [A|B].End points to the last accessed byte, plus one.
1702 // There is no conflict when the intervals are disjoint:
1703 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1704 //
1705 // bound0 = (B.Start < A.End)
1706 // bound1 = (A.Start < B.End)
1707 // IsConflict = bound0 & bound1
1708 Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
1709 Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
1710 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1711 if (MemoryRuntimeCheck) {
1712 IsConflict =
1713 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1714 }
1715 MemoryRuntimeCheck = IsConflict;
1716 }
1717
1718 return MemoryRuntimeCheck;
1719}
1720
1722 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1723 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1724
1725 LLVMContext &Ctx = Loc->getContext();
1726 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1727 Loc->getModule()->getDataLayout());
1728 ChkBuilder.SetInsertPoint(Loc);
1729 // Our instructions might fold to a constant.
1730 Value *MemoryRuntimeCheck = nullptr;
1731
1732 for (const auto &C : Checks) {
1733 Type *Ty = C.SinkStart->getType();
1734 // Compute VF * IC * AccessSize.
1735 auto *VFTimesUFTimesSize =
1736 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1737 ConstantInt::get(Ty, IC * C.AccessSize));
1738 Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
1739 Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
1740 if (C.NeedsFreeze) {
1741 IRBuilder<> Builder(Loc);
1742 Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
1743 Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
1744 }
1745 Value *Diff = ChkBuilder.CreateSub(Sink, Src);
1746 Value *IsConflict =
1747 ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1748
1749 if (MemoryRuntimeCheck) {
1750 IsConflict =
1751 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1752 }
1753 MemoryRuntimeCheck = IsConflict;
1754 }
1755
1756 return MemoryRuntimeCheck;
1757}
1758
1759std::optional<IVConditionInfo>
1761 const MemorySSA &MSSA, AAResults &AA) {
1762 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
1763 if (!TI || !TI->isConditional())
1764 return {};
1765
1766 auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
1767 // The case with the condition outside the loop should already be handled
1768 // earlier.
1769 if (!CondI || !L.contains(CondI))
1770 return {};
1771
1772 SmallVector<Instruction *> InstToDuplicate;
1773 InstToDuplicate.push_back(CondI);
1774
1775 SmallVector<Value *, 4> WorkList;
1776 WorkList.append(CondI->op_begin(), CondI->op_end());
1777
1778 SmallVector<MemoryAccess *, 4> AccessesToCheck;
1779 SmallVector<MemoryLocation, 4> AccessedLocs;
1780 while (!WorkList.empty()) {
1781 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
1782 if (!I || !L.contains(I))
1783 continue;
1784
1785 // TODO: support additional instructions.
1786 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
1787 return {};
1788
1789 // Do not duplicate volatile and atomic loads.
1790 if (auto *LI = dyn_cast<LoadInst>(I))
1791 if (LI->isVolatile() || LI->isAtomic())
1792 return {};
1793
1794 InstToDuplicate.push_back(I);
1795 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1796 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
1797 // Queue the defining access to check for alias checks.
1798 AccessesToCheck.push_back(MemUse->getDefiningAccess());
1799 AccessedLocs.push_back(MemoryLocation::get(I));
1800 } else {
1801 // MemoryDefs may clobber the location or may be atomic memory
1802 // operations. Bail out.
1803 return {};
1804 }
1805 }
1806 WorkList.append(I->op_begin(), I->op_end());
1807 }
1808
1809 if (InstToDuplicate.empty())
1810 return {};
1811
1812 SmallVector<BasicBlock *, 4> ExitingBlocks;
1813 L.getExitingBlocks(ExitingBlocks);
1814 auto HasNoClobbersOnPath =
1815 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1816 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
1817 SmallVector<MemoryAccess *, 4> AccessesToCheck)
1818 -> std::optional<IVConditionInfo> {
1820 // First, collect all blocks in the loop that are on a patch from Succ
1821 // to the header.
1823 WorkList.push_back(Succ);
1824 WorkList.push_back(Header);
1826 Seen.insert(Header);
1827 Info.PathIsNoop &=
1828 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1829
1830 while (!WorkList.empty()) {
1831 BasicBlock *Current = WorkList.pop_back_val();
1832 if (!L.contains(Current))
1833 continue;
1834 const auto &SeenIns = Seen.insert(Current);
1835 if (!SeenIns.second)
1836 continue;
1837
1838 Info.PathIsNoop &= all_of(
1839 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1840 WorkList.append(succ_begin(Current), succ_end(Current));
1841 }
1842
1843 // Require at least 2 blocks on a path through the loop. This skips
1844 // paths that directly exit the loop.
1845 if (Seen.size() < 2)
1846 return {};
1847
1848 // Next, check if there are any MemoryDefs that are on the path through
1849 // the loop (in the Seen set) and they may-alias any of the locations in
1850 // AccessedLocs. If that is the case, they may modify the condition and
1851 // partial unswitching is not possible.
1852 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
1853 while (!AccessesToCheck.empty()) {
1854 MemoryAccess *Current = AccessesToCheck.pop_back_val();
1855 auto SeenI = SeenAccesses.insert(Current);
1856 if (!SeenI.second || !Seen.contains(Current->getBlock()))
1857 continue;
1858
1859 // Bail out if exceeded the threshold.
1860 if (SeenAccesses.size() >= MSSAThreshold)
1861 return {};
1862
1863 // MemoryUse are read-only accesses.
1864 if (isa<MemoryUse>(Current))
1865 continue;
1866
1867 // For a MemoryDef, check if is aliases any of the location feeding
1868 // the original condition.
1869 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
1870 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
1871 return isModSet(
1872 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
1873 }))
1874 return {};
1875 }
1876
1877 for (Use &U : Current->uses())
1878 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
1879 }
1880
1881 // We could also allow loops with known trip counts without mustprogress,
1882 // but ScalarEvolution may not be available.
1883 Info.PathIsNoop &= isMustProgress(&L);
1884
1885 // If the path is considered a no-op so far, check if it reaches a
1886 // single exit block without any phis. This ensures no values from the
1887 // loop are used outside of the loop.
1888 if (Info.PathIsNoop) {
1889 for (auto *Exiting : ExitingBlocks) {
1890 if (!Seen.contains(Exiting))
1891 continue;
1892 for (auto *Succ : successors(Exiting)) {
1893 if (L.contains(Succ))
1894 continue;
1895
1896 Info.PathIsNoop &= Succ->phis().empty() &&
1897 (!Info.ExitForPath || Info.ExitForPath == Succ);
1898 if (!Info.PathIsNoop)
1899 break;
1900 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
1901 "cannot have multiple exit blocks");
1902 Info.ExitForPath = Succ;
1903 }
1904 }
1905 }
1906 if (!Info.ExitForPath)
1907 Info.PathIsNoop = false;
1908
1909 Info.InstToDuplicate = InstToDuplicate;
1910 return Info;
1911 };
1912
1913 // If we branch to the same successor, partial unswitching will not be
1914 // beneficial.
1915 if (TI->getSuccessor(0) == TI->getSuccessor(1))
1916 return {};
1917
1918 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
1919 AccessesToCheck)) {
1920 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
1921 return Info;
1922 }
1923 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
1924 AccessesToCheck)) {
1925 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
1926 return Info;
1927 }
1928
1929 return {};
1930}
amdgpu AMDGPU Register Bank Select
assume Assume Builder
This is the interface for LLVM's primary stateless and local alias analysis.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:86
std::string Name
bool End
Definition: ELF_riscv.cpp:464
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
#define Check(C,...)
Definition: Lint.cpp:168
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:797
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1206
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:55
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1247
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:54
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:203
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:779
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1301
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1628
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:196
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:279
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:740
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:717
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:715
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:734
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:738
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:291
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:151
Identifies a unique instance of a variable.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:297
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2152
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1273
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2022
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1404
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1426
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1290
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:594
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:950
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:970
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1303
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1301
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1309
LLVMContext & getContext() const
Definition: Metadata.h:1114
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:772
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:509
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
Tuple of metadata.
Definition: Metadata.h:1345
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
Root of the metadata hierarchy.
Definition: Metadata.h:61
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
FastMathFlags getFastMathFlags() const
TrackingVH< Value > getRecurrenceStartValue() const
RecurKind getRecurrenceKind() const
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
static bool isSelectCmpRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:93
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:235
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:229
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
iterator_range< use_iterator > uses()
Definition: Value.h:376
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:250
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:824
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1084
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
Definition: LoopUtils.cpp:1172
Value * createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right)
See RecurrenceDescriptor::isSelectCmpPattern for a description of the pattern we are trying to match.
Definition: LoopUtils.cpp:934
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1066
Value * createSimpleTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:1064
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1552
auto successors(const MachineBasicBlock *BB)
Value * createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1102
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:189
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
char & LCSSAID
Definition: LCSSA.cpp:477
char & LoopSimplifyID
Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:896
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:943
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:214
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1190
TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:391
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:2025
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:527
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1117
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:482
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:984
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1119
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:427
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:699
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1131
bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
Definition: LoopUtils.cpp:1165
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2296
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:397
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1088
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:915
TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:437
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:270
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:273
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:293
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:287
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:279
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:276
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1666
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:35
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:842
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1524
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1577
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1151
Value * createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a target reduction of the given vector Src for a reduction of the kind RecurKind::SelectICmp o...
Definition: LoopUtils.cpp:1024
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:874
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1312
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:123
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:1721
ReplaceExitVal
Definition: LoopUtils.h:451
@ UnusedIndVarInLoop
Definition: LoopUtils.h:455
@ OnlyCheapRepl
Definition: LoopUtils.h:453
@ AlwaysRepl
Definition: LoopUtils.h:456
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1760
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1179
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1158
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:959
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1016
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1594
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1621
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1622
TrackingVH< Value > End
Definition: LoopUtils.cpp:1623
unsigned Ith
Definition: LoopUtils.cpp:1233
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1238
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1234
PHINode * PN
Definition: LoopUtils.cpp:1232
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1235
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:529
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.