LLVM 19.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(PredBB)) {
75 if (isa<IndirectBrInst>(PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Users, [&](User *U) {
132 auto *Use = cast<Instruction>(U);
133 return !L->contains(Use->getParent());
134 }))
135 UsedOutside.push_back(&Inst);
136 }
137
138 return UsedOutside;
139}
140
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
206 MDString::get(Context, Name),
207 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
225 if (S && S->getString() == StringMD) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(0, NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(*Width, IsScalable.value_or(false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
278 MDs.push_back(nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(0).get();
294 if (!isa<MDString>(NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.starts_with(InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
320 MDs.push_back(Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
341 return FollowupLoopID;
342}
343
346}
347
350}
351
353 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
374 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
428 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
438 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
470 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
471 assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");
472 Value *IncV = PN->getIncomingValue(LatchIdx);
473
474 for (User *U : PN->users())
475 if (U != Cond && U != IncV) return false;
476
477 for (User *U : IncV->users())
478 if (U != Cond && U != PN) return false;
479 return true;
480}
481
482
484 LoopInfo *LI, MemorySSA *MSSA) {
485 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
486 auto *Preheader = L->getLoopPreheader();
487 assert(Preheader && "Preheader should exist!");
488
489 std::unique_ptr<MemorySSAUpdater> MSSAU;
490 if (MSSA)
491 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
492
493 // Now that we know the removal is safe, remove the loop by changing the
494 // branch from the preheader to go to the single exit block.
495 //
496 // Because we're deleting a large chunk of code at once, the sequence in which
497 // we remove things is very important to avoid invalidation issues.
498
499 // Tell ScalarEvolution that the loop is deleted. Do this before
500 // deleting the loop so that ScalarEvolution can look at the loop
501 // to determine what it needs to clean up.
502 if (SE) {
503 SE->forgetLoop(L);
505 }
506
507 Instruction *OldTerm = Preheader->getTerminator();
508 assert(!OldTerm->mayHaveSideEffects() &&
509 "Preheader must end with a side-effect-free terminator");
510 assert(OldTerm->getNumSuccessors() == 1 &&
511 "Preheader must have a single successor");
512 // Connect the preheader to the exit block. Keep the old edge to the header
513 // around to perform the dominator tree update in two separate steps
514 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
515 // preheader -> header.
516 //
517 //
518 // 0. Preheader 1. Preheader 2. Preheader
519 // | | | |
520 // V | V |
521 // Header <--\ | Header <--\ | Header <--\
522 // | | | | | | | | | | |
523 // | V | | | V | | | V |
524 // | Body --/ | | Body --/ | | Body --/
525 // V V V V V
526 // Exit Exit Exit
527 //
528 // By doing this is two separate steps we can perform the dominator tree
529 // update without using the batch update API.
530 //
531 // Even when the loop is never executed, we cannot remove the edge from the
532 // source block to the exit block. Consider the case where the unexecuted loop
533 // branches back to an outer loop. If we deleted the loop and removed the edge
534 // coming to this inner loop, this will break the outer loop structure (by
535 // deleting the backedge of the outer loop). If the outer loop is indeed a
536 // non-loop, it will be deleted in a future iteration of loop deletion pass.
537 IRBuilder<> Builder(OldTerm);
538
539 auto *ExitBlock = L->getUniqueExitBlock();
540 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
541 if (ExitBlock) {
542 assert(ExitBlock && "Should have a unique exit block!");
543 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
544
545 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
546 // Remove the old branch. The conditional branch becomes a new terminator.
547 OldTerm->eraseFromParent();
548
549 // Rewrite phis in the exit block to get their inputs from the Preheader
550 // instead of the exiting block.
551 for (PHINode &P : ExitBlock->phis()) {
552 // Set the zero'th element of Phi to be from the preheader and remove all
553 // other incoming values. Given the loop has dedicated exits, all other
554 // incoming values must be from the exiting blocks.
555 int PredIndex = 0;
556 P.setIncomingBlock(PredIndex, Preheader);
557 // Removes all incoming values from all other exiting blocks (including
558 // duplicate values from an exiting block).
559 // Nuke all entries except the zero'th entry which is the preheader entry.
560 P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
561 /* DeletePHIIfEmpty */ false);
562
563 assert((P.getNumIncomingValues() == 1 &&
564 P.getIncomingBlock(PredIndex) == Preheader) &&
565 "Should have exactly one value and that's from the preheader!");
566 }
567
568 if (DT) {
569 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
570 if (MSSA) {
571 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
572 *DT);
573 if (VerifyMemorySSA)
574 MSSA->verifyMemorySSA();
575 }
576 }
577
578 // Disconnect the loop body by branching directly to its exit.
579 Builder.SetInsertPoint(Preheader->getTerminator());
580 Builder.CreateBr(ExitBlock);
581 // Remove the old branch.
582 Preheader->getTerminator()->eraseFromParent();
583 } else {
584 assert(L->hasNoExitBlocks() &&
585 "Loop should have either zero or one exit blocks.");
586
587 Builder.SetInsertPoint(OldTerm);
588 Builder.CreateUnreachable();
589 Preheader->getTerminator()->eraseFromParent();
590 }
591
592 if (DT) {
593 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
594 if (MSSA) {
595 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
596 *DT);
597 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
598 L->block_end());
599 MSSAU->removeBlocks(DeadBlockSet);
600 if (VerifyMemorySSA)
601 MSSA->verifyMemorySSA();
602 }
603 }
604
605 // Use a map to unique and a vector to guarantee deterministic ordering.
608 llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;
609
610 if (ExitBlock) {
611 // Given LCSSA form is satisfied, we should not have users of instructions
612 // within the dead loop outside of the loop. However, LCSSA doesn't take
613 // unreachable uses into account. We handle them here.
614 // We could do it after drop all references (in this case all users in the
615 // loop will be already eliminated and we have less work to do but according
616 // to API doc of User::dropAllReferences only valid operation after dropping
617 // references, is deletion. So let's substitute all usages of
618 // instruction from the loop with poison value of corresponding type first.
619 for (auto *Block : L->blocks())
620 for (Instruction &I : *Block) {
621 auto *Poison = PoisonValue::get(I.getType());
622 for (Use &U : llvm::make_early_inc_range(I.uses())) {
623 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
624 if (L->contains(Usr->getParent()))
625 continue;
626 // If we have a DT then we can check that uses outside a loop only in
627 // unreachable block.
628 if (DT)
630 "Unexpected user in reachable block");
631 U.set(Poison);
632 }
633
634 // RemoveDIs: do the same as below for DbgVariableRecords.
635 if (Block->IsNewDbgInfoFormat) {
637 filterDbgVars(I.getDbgRecordRange()))) {
638 DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
639 DVR.getDebugLoc().get());
640 if (!DeadDebugSet.insert(Key).second)
641 continue;
642 // Unlinks the DVR from it's container, for later insertion.
643 DVR.removeFromParent();
644 DeadDbgVariableRecords.push_back(&DVR);
645 }
646 }
647
648 // For one of each variable encountered, preserve a debug intrinsic (set
649 // to Poison) and transfer it to the loop exit. This terminates any
650 // variable locations that were set during the loop.
651 auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
652 if (!DVI)
653 continue;
654 if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
655 continue;
656 DeadDebugInst.push_back(DVI);
657 }
658
659 // After the loop has been deleted all the values defined and modified
660 // inside the loop are going to be unavailable. Values computed in the
661 // loop will have been deleted, automatically causing their debug uses
662 // be be replaced with undef. Loop invariant values will still be available.
663 // Move dbg.values out the loop so that earlier location ranges are still
664 // terminated and loop invariant assignments are preserved.
665 DIBuilder DIB(*ExitBlock->getModule());
666 BasicBlock::iterator InsertDbgValueBefore =
667 ExitBlock->getFirstInsertionPt();
668 assert(InsertDbgValueBefore != ExitBlock->end() &&
669 "There should be a non-PHI instruction in exit block, else these "
670 "instructions will have no parent.");
671
672 for (auto *DVI : DeadDebugInst)
673 DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
674
675 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
676 // each DbgVariableRecord right at the start of the block, wheras dbg.values
677 // would be repeatedly inserted before the first instruction. To replicate
678 // this behaviour, do it backwards.
679 for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords))
680 ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);
681 }
682
683 // Remove the block from the reference counting scheme, so that we can
684 // delete it freely later.
685 for (auto *Block : L->blocks())
686 Block->dropAllReferences();
687
688 if (MSSA && VerifyMemorySSA)
689 MSSA->verifyMemorySSA();
690
691 if (LI) {
692 // Erase the instructions and the blocks without having to worry
693 // about ordering because we already dropped the references.
694 // NOTE: This iteration is safe because erasing the block does not remove
695 // its entry from the loop's block list. We do that in the next section.
696 for (BasicBlock *BB : L->blocks())
697 BB->eraseFromParent();
698
699 // Finally, the blocks from loopinfo. This has to happen late because
700 // otherwise our loop iterators won't work.
701
703 blocks.insert(L->block_begin(), L->block_end());
704 for (BasicBlock *BB : blocks)
705 LI->removeBlock(BB);
706
707 // The last step is to update LoopInfo now that we've eliminated this loop.
708 // Note: LoopInfo::erase remove the given loop and relink its subloops with
709 // its parent. While removeLoop/removeChildLoop remove the given loop but
710 // not relink its subloops, which is what we want.
711 if (Loop *ParentLoop = L->getParentLoop()) {
712 Loop::iterator I = find(*ParentLoop, L);
713 assert(I != ParentLoop->end() && "Couldn't find loop");
714 ParentLoop->removeChildLoop(I);
715 } else {
716 Loop::iterator I = find(*LI, L);
717 assert(I != LI->end() && "Couldn't find loop");
718 LI->removeLoop(I);
719 }
720 LI->destroy(L);
721 }
722}
723
725 LoopInfo &LI, MemorySSA *MSSA) {
726 auto *Latch = L->getLoopLatch();
727 assert(Latch && "multiple latches not yet supported");
728 auto *Header = L->getHeader();
729 Loop *OutermostLoop = L->getOutermostLoop();
730
731 SE.forgetLoop(L);
733
734 std::unique_ptr<MemorySSAUpdater> MSSAU;
735 if (MSSA)
736 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
737
738 // Update the CFG and domtree. We chose to special case a couple of
739 // of common cases for code quality and test readability reasons.
740 [&]() -> void {
741 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
742 if (!BI->isConditional()) {
743 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
744 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
745 MSSAU.get());
746 return;
747 }
748
749 // Conditional latch/exit - note that latch can be shared by inner
750 // and outer loop so the other target doesn't need to an exit
751 if (L->isLoopExiting(Latch)) {
752 // TODO: Generalize ConstantFoldTerminator so that it can be used
753 // here without invalidating LCSSA or MemorySSA. (Tricky case for
754 // LCSSA: header is an exit block of a preceeding sibling loop w/o
755 // dedicated exits.)
756 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
757 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
758
759 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
760 Header->removePredecessor(Latch, true);
761
762 IRBuilder<> Builder(BI);
763 auto *NewBI = Builder.CreateBr(ExitBB);
764 // Transfer the metadata to the new branch instruction (minus the
765 // loop info since this is no longer a loop)
766 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
767 LLVMContext::MD_annotation});
768
769 BI->eraseFromParent();
770 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
771 if (MSSA)
772 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
773 return;
774 }
775 }
776
777 // General case. By splitting the backedge, and then explicitly making it
778 // unreachable we gracefully handle corner cases such as switch and invoke
779 // termiantors.
780 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
781
782 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
783 (void)changeToUnreachable(BackedgeBB->getTerminator(),
784 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
785 }();
786
787 // Erase (and destroy) this loop instance. Handles relinking sub-loops
788 // and blocks within the loop as needed.
789 LI.erase(L);
790
791 // If the loop we broke had a parent, then changeToUnreachable might have
792 // caused a block to be removed from the parent loop (see loop_nest_lcssa
793 // test case in zero-btc.ll for an example), thus changing the parent's
794 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
795 // loop which might have a had a block removed.
796 if (OutermostLoop != L)
797 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
798}
799
800
801/// Checks if \p L has an exiting latch branch. There may also be other
802/// exiting blocks. Returns branch instruction terminating the loop
803/// latch if above check is successful, nullptr otherwise.
805 BasicBlock *Latch = L->getLoopLatch();
806 if (!Latch)
807 return nullptr;
808
809 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
810 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
811 return nullptr;
812
813 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
814 LatchBR->getSuccessor(1) == L->getHeader()) &&
815 "At least one edge out of the latch must go to the header");
816
817 return LatchBR;
818}
819
820/// Return the estimated trip count for any exiting branch which dominates
821/// the loop latch.
822static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
823 Loop *L,
824 uint64_t &OrigExitWeight) {
825 // To estimate the number of times the loop body was executed, we want to
826 // know the number of times the backedge was taken, vs. the number of times
827 // we exited the loop.
828 uint64_t LoopWeight, ExitWeight;
829 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
830 return std::nullopt;
831
832 if (L->contains(ExitingBranch->getSuccessor(1)))
833 std::swap(LoopWeight, ExitWeight);
834
835 if (!ExitWeight)
836 // Don't have a way to return predicated infinite
837 return std::nullopt;
838
839 OrigExitWeight = ExitWeight;
840
841 // Estimated exit count is a ratio of the loop weight by the weight of the
842 // edge exiting the loop, rounded to nearest.
843 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
844 // Estimated trip count is one plus estimated exit count.
845 return ExitCount + 1;
846}
847
848std::optional<unsigned>
850 unsigned *EstimatedLoopInvocationWeight) {
851 // Currently we take the estimate exit count only from the loop latch,
852 // ignoring other exiting blocks. This can overestimate the trip count
853 // if we exit through another exit, but can never underestimate it.
854 // TODO: incorporate information from other exits
855 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
856 uint64_t ExitWeight;
857 if (std::optional<uint64_t> EstTripCount =
858 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
859 if (EstimatedLoopInvocationWeight)
860 *EstimatedLoopInvocationWeight = ExitWeight;
861 return *EstTripCount;
862 }
863 }
864 return std::nullopt;
865}
866
867bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
868 unsigned EstimatedloopInvocationWeight) {
869 // At the moment, we currently support changing the estimate trip count of
870 // the latch branch only. We could extend this API to manipulate estimated
871 // trip counts for any exit.
873 if (!LatchBranch)
874 return false;
875
876 // Calculate taken and exit weights.
877 unsigned LatchExitWeight = 0;
878 unsigned BackedgeTakenWeight = 0;
879
880 if (EstimatedTripCount > 0) {
881 LatchExitWeight = EstimatedloopInvocationWeight;
882 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
883 }
884
885 // Make a swap if back edge is taken when condition is "false".
886 if (LatchBranch->getSuccessor(0) != L->getHeader())
887 std::swap(BackedgeTakenWeight, LatchExitWeight);
888
889 MDBuilder MDB(LatchBranch->getContext());
890
891 // Set/Update profile metadata.
892 LatchBranch->setMetadata(
893 LLVMContext::MD_prof,
894 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
895
896 return true;
897}
898
900 ScalarEvolution &SE) {
901 Loop *OuterL = InnerLoop->getParentLoop();
902 if (!OuterL)
903 return true;
904
905 // Get the backedge taken count for the inner loop
906 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
907 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
908 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
909 !InnerLoopBECountSC->getType()->isIntegerTy())
910 return false;
911
912 // Get whether count is invariant to the outer loop
914 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
916 return false;
917
918 return true;
919}
920
922 switch (RdxID) {
923 case Intrinsic::vector_reduce_fadd:
924 return Instruction::FAdd;
925 case Intrinsic::vector_reduce_fmul:
926 return Instruction::FMul;
927 case Intrinsic::vector_reduce_add:
928 return Instruction::Add;
929 case Intrinsic::vector_reduce_mul:
930 return Instruction::Mul;
931 case Intrinsic::vector_reduce_and:
932 return Instruction::And;
933 case Intrinsic::vector_reduce_or:
934 return Instruction::Or;
935 case Intrinsic::vector_reduce_xor:
936 return Instruction::Xor;
937 case Intrinsic::vector_reduce_smax:
938 case Intrinsic::vector_reduce_smin:
939 case Intrinsic::vector_reduce_umax:
940 case Intrinsic::vector_reduce_umin:
941 return Instruction::ICmp;
942 case Intrinsic::vector_reduce_fmax:
943 case Intrinsic::vector_reduce_fmin:
944 return Instruction::FCmp;
945 default:
946 llvm_unreachable("Unexpected ID");
947 }
948}
949
951 switch (RdxID) {
952 default:
953 llvm_unreachable("Unknown min/max recurrence kind");
954 case Intrinsic::vector_reduce_umin:
955 return Intrinsic::umin;
956 case Intrinsic::vector_reduce_umax:
957 return Intrinsic::umax;
958 case Intrinsic::vector_reduce_smin:
959 return Intrinsic::smin;
960 case Intrinsic::vector_reduce_smax:
961 return Intrinsic::smax;
962 case Intrinsic::vector_reduce_fmin:
963 return Intrinsic::minnum;
964 case Intrinsic::vector_reduce_fmax:
965 return Intrinsic::maxnum;
966 case Intrinsic::vector_reduce_fminimum:
967 return Intrinsic::minimum;
968 case Intrinsic::vector_reduce_fmaximum:
969 return Intrinsic::maximum;
970 }
971}
972
974 switch (RK) {
975 default:
976 llvm_unreachable("Unknown min/max recurrence kind");
977 case RecurKind::UMin:
978 return Intrinsic::umin;
979 case RecurKind::UMax:
980 return Intrinsic::umax;
981 case RecurKind::SMin:
982 return Intrinsic::smin;
983 case RecurKind::SMax:
984 return Intrinsic::smax;
985 case RecurKind::FMin:
986 return Intrinsic::minnum;
987 case RecurKind::FMax:
988 return Intrinsic::maxnum;
989 case RecurKind::FMinimum:
990 return Intrinsic::minimum;
991 case RecurKind::FMaximum:
992 return Intrinsic::maximum;
993 }
994}
995
997 switch (RdxID) {
998 case Intrinsic::vector_reduce_smax:
999 return RecurKind::SMax;
1000 case Intrinsic::vector_reduce_smin:
1001 return RecurKind::SMin;
1002 case Intrinsic::vector_reduce_umax:
1003 return RecurKind::UMax;
1004 case Intrinsic::vector_reduce_umin:
1005 return RecurKind::UMin;
1006 case Intrinsic::vector_reduce_fmax:
1007 return RecurKind::FMax;
1008 case Intrinsic::vector_reduce_fmin:
1009 return RecurKind::FMin;
1010 default:
1011 return RecurKind::None;
1012 }
1013}
1014
1016 switch (RK) {
1017 default:
1018 llvm_unreachable("Unknown min/max recurrence kind");
1019 case RecurKind::UMin:
1020 return CmpInst::ICMP_ULT;
1021 case RecurKind::UMax:
1022 return CmpInst::ICMP_UGT;
1023 case RecurKind::SMin:
1024 return CmpInst::ICMP_SLT;
1025 case RecurKind::SMax:
1026 return CmpInst::ICMP_SGT;
1027 case RecurKind::FMin:
1028 return CmpInst::FCMP_OLT;
1029 case RecurKind::FMax:
1030 return CmpInst::FCMP_OGT;
1031 // We do not add FMinimum/FMaximum recurrence kind here since there is no
1032 // equivalent predicate which compares signed zeroes according to the
1033 // semantics of the intrinsics (llvm.minimum/maximum).
1034 }
1035}
1036
1038 Value *Right) {
1039 Type *Ty = Left->getType();
1040 if (Ty->isIntOrIntVectorTy() ||
1041 (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
1042 // TODO: Add float minnum/maxnum support when FMF nnan is set.
1044 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
1045 "rdx.minmax");
1046 }
1048 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
1049 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
1050 return Select;
1051}
1052
1053// Helper to generate an ordered reduction.
1055 unsigned Op, RecurKind RdxKind) {
1056 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1057
1058 // Extract and apply reduction ops in ascending order:
1059 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1060 Value *Result = Acc;
1061 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1062 Value *Ext =
1063 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
1064
1065 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1066 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
1067 "bin.rdx");
1068 } else {
1070 "Invalid min/max");
1071 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
1072 }
1073 }
1074
1075 return Result;
1076}
1077
1078// Helper to generate a log2 shuffle reduction.
1080 unsigned Op,
1082 RecurKind RdxKind) {
1083 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1084 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1085 // and vector ops, reducing the set of values being computed by half each
1086 // round.
1087 assert(isPowerOf2_32(VF) &&
1088 "Reduction emission only supported for pow2 vectors!");
1089 // Note: fast-math-flags flags are controlled by the builder configuration
1090 // and are assumed to apply to all generated arithmetic instructions. Other
1091 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1092 // of the builder configuration, and since they're not passed explicitly,
1093 // will never be relevant here. Note that it would be generally unsound to
1094 // propagate these from an intrinsic call to the expansion anyways as we/
1095 // change the order of operations.
1096 auto BuildShuffledOp = [&Builder, &Op,
1097 &RdxKind](SmallVectorImpl<int> &ShuffleMask,
1098 Value *&TmpVec) -> void {
1099 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1100 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1101 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1102 "bin.rdx");
1103 } else {
1105 "Invalid min/max");
1106 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1107 }
1108 };
1109
1110 Value *TmpVec = Src;
1111 if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1112 SmallVector<int, 32> ShuffleMask(VF);
1113 for (unsigned stride = 1; stride < VF; stride <<= 1) {
1114 // Initialise the mask with undef.
1115 std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1);
1116 for (unsigned j = 0; j < VF; j += stride << 1) {
1117 ShuffleMask[j] = j + stride;
1118 }
1119 BuildShuffledOp(ShuffleMask, TmpVec);
1120 }
1121 } else {
1122 SmallVector<int, 32> ShuffleMask(VF);
1123 for (unsigned i = VF; i != 1; i >>= 1) {
1124 // Move the upper half of the vector to the lower half.
1125 for (unsigned j = 0; j != i / 2; ++j)
1126 ShuffleMask[j] = i / 2 + j;
1127
1128 // Fill the rest of the mask with undef.
1129 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1130 BuildShuffledOp(ShuffleMask, TmpVec);
1131 }
1132 }
1133 // The result is in the first element of the vector.
1134 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1135}
1136
1139 PHINode *OrigPhi) {
1140 assert(
1142 "Unexpected reduction kind");
1143 Value *InitVal = Desc.getRecurrenceStartValue();
1144 Value *NewVal = nullptr;
1145
1146 // First use the original phi to determine the new value we're trying to
1147 // select from in the loop.
1148 SelectInst *SI = nullptr;
1149 for (auto *U : OrigPhi->users()) {
1150 if ((SI = dyn_cast<SelectInst>(U)))
1151 break;
1152 }
1153 assert(SI && "One user of the original phi should be a select");
1154
1155 if (SI->getTrueValue() == OrigPhi)
1156 NewVal = SI->getFalseValue();
1157 else {
1158 assert(SI->getFalseValue() == OrigPhi &&
1159 "At least one input to the select should be the original Phi");
1160 NewVal = SI->getTrueValue();
1161 }
1162
1163 // If any predicate is true it means that we want to select the new value.
1164 Value *AnyOf =
1165 Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1166 // The compares in the loop may yield poison, which propagates through the
1167 // bitwise ORs. Freeze it here before the condition is used.
1168 AnyOf = Builder.CreateFreeze(AnyOf);
1169 return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
1170}
1171
1173 RecurKind RdxKind) {
1174 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1175 switch (RdxKind) {
1176 case RecurKind::Add:
1177 return Builder.CreateAddReduce(Src);
1178 case RecurKind::Mul:
1179 return Builder.CreateMulReduce(Src);
1180 case RecurKind::And:
1181 return Builder.CreateAndReduce(Src);
1182 case RecurKind::Or:
1183 return Builder.CreateOrReduce(Src);
1184 case RecurKind::Xor:
1185 return Builder.CreateXorReduce(Src);
1186 case RecurKind::FMulAdd:
1187 case RecurKind::FAdd:
1188 return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1189 Src);
1190 case RecurKind::FMul:
1191 return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1192 case RecurKind::SMax:
1193 return Builder.CreateIntMaxReduce(Src, true);
1194 case RecurKind::SMin:
1195 return Builder.CreateIntMinReduce(Src, true);
1196 case RecurKind::UMax:
1197 return Builder.CreateIntMaxReduce(Src, false);
1198 case RecurKind::UMin:
1199 return Builder.CreateIntMinReduce(Src, false);
1200 case RecurKind::FMax:
1201 return Builder.CreateFPMaxReduce(Src);
1202 case RecurKind::FMin:
1203 return Builder.CreateFPMinReduce(Src);
1204 case RecurKind::FMinimum:
1205 return Builder.CreateFPMinimumReduce(Src);
1206 case RecurKind::FMaximum:
1207 return Builder.CreateFPMaximumReduce(Src);
1208 default:
1209 llvm_unreachable("Unhandled opcode");
1210 }
1211}
1212
1214 const RecurrenceDescriptor &Desc) {
1215 RecurKind Kind = Desc.getRecurrenceKind();
1217 "AnyOf reduction is not supported.");
1218 auto *SrcTy = cast<VectorType>(Src->getType());
1219 Type *SrcEltTy = SrcTy->getElementType();
1220 Value *Iden =
1221 Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
1222 Value *Ops[] = {Iden, Src};
1223 return VBuilder.createSimpleTargetReduction(Kind, SrcTy, Ops);
1224}
1225
1227 const RecurrenceDescriptor &Desc, Value *Src,
1228 PHINode *OrigPhi) {
1229 // TODO: Support in-order reductions based on the recurrence descriptor.
1230 // All ops in the reduction inherit fast-math-flags from the recurrence
1231 // descriptor.
1233 B.setFastMathFlags(Desc.getFastMathFlags());
1234
1235 RecurKind RK = Desc.getRecurrenceKind();
1237 return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
1238
1239 return createSimpleTargetReduction(B, Src, RK);
1240}
1241
1244 Value *Src, Value *Start) {
1245 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1246 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1247 "Unexpected reduction kind");
1248 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1249 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1250
1251 return B.CreateFAddReduce(Start, Src);
1252}
1253
1256 Value *Src, Value *Start) {
1257 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1258 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1259 "Unexpected reduction kind");
1260 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1261 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1262
1263 auto *SrcTy = cast<VectorType>(Src->getType());
1264 Value *Ops[] = {Start, Src};
1265 return VBuilder.createSimpleTargetReduction(RecurKind::FAdd, SrcTy, Ops);
1266}
1267
1269 bool IncludeWrapFlags) {
1270 auto *VecOp = dyn_cast<Instruction>(I);
1271 if (!VecOp)
1272 return;
1273 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1274 : dyn_cast<Instruction>(OpValue);
1275 if (!Intersection)
1276 return;
1277 const unsigned Opcode = Intersection->getOpcode();
1278 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1279 for (auto *V : VL) {
1280 auto *Instr = dyn_cast<Instruction>(V);
1281 if (!Instr)
1282 continue;
1283 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1284 VecOp->andIRFlags(V);
1285 }
1286}
1287
1288bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1289 ScalarEvolution &SE) {
1290 const SCEV *Zero = SE.getZero(S->getType());
1291 return SE.isAvailableAtLoopEntry(S, L) &&
1292 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1293}
1294
1296 ScalarEvolution &SE) {
1297 const SCEV *Zero = SE.getZero(S->getType());
1298 return SE.isAvailableAtLoopEntry(S, L) &&
1299 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1300}
1301
1302bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1303 ScalarEvolution &SE) {
1304 const SCEV *Zero = SE.getZero(S->getType());
1305 return SE.isAvailableAtLoopEntry(S, L) &&
1306 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1307}
1308
1310 ScalarEvolution &SE) {
1311 const SCEV *Zero = SE.getZero(S->getType());
1312 return SE.isAvailableAtLoopEntry(S, L) &&
1313 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1314}
1315
1317 bool Signed) {
1318 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1321 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1322 return SE.isAvailableAtLoopEntry(S, L) &&
1323 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1324 SE.getConstant(Min));
1325}
1326
1328 bool Signed) {
1329 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1332 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1333 return SE.isAvailableAtLoopEntry(S, L) &&
1334 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1335 SE.getConstant(Max));
1336}
1337
1338//===----------------------------------------------------------------------===//
1339// rewriteLoopExitValues - Optimize IV users outside the loop.
1340// As a side effect, reduces the amount of IV processing within the loop.
1341//===----------------------------------------------------------------------===//
1342
1343static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1346 Visited.insert(I);
1347 WorkList.push_back(I);
1348 while (!WorkList.empty()) {
1349 const Instruction *Curr = WorkList.pop_back_val();
1350 // This use is outside the loop, nothing to do.
1351 if (!L->contains(Curr))
1352 continue;
1353 // Do we assume it is a "hard" use which will not be eliminated easily?
1354 if (Curr->mayHaveSideEffects())
1355 return true;
1356 // Otherwise, add all its users to worklist.
1357 for (const auto *U : Curr->users()) {
1358 auto *UI = cast<Instruction>(U);
1359 if (Visited.insert(UI).second)
1360 WorkList.push_back(UI);
1361 }
1362 }
1363 return false;
1364}
1365
1366// Collect information about PHI nodes which can be transformed in
1367// rewriteLoopExitValues.
1369 PHINode *PN; // For which PHI node is this replacement?
1370 unsigned Ith; // For which incoming value?
1371 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1372 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1373 bool HighCost; // Is this expansion a high-cost?
1374
1375 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1376 bool H)
1377 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1378 HighCost(H) {}
1379};
1380
1381// Check whether it is possible to delete the loop after rewriting exit
1382// value. If it is possible, ignore ReplaceExitValue and do rewriting
1383// aggressively.
1384static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1385 BasicBlock *Preheader = L->getLoopPreheader();
1386 // If there is no preheader, the loop will not be deleted.
1387 if (!Preheader)
1388 return false;
1389
1390 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1391 // We obviate multiple ExitingBlocks case for simplicity.
1392 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1393 // after exit value rewriting, we can enhance the logic here.
1394 SmallVector<BasicBlock *, 4> ExitingBlocks;
1395 L->getExitingBlocks(ExitingBlocks);
1397 L->getUniqueExitBlocks(ExitBlocks);
1398 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1399 return false;
1400
1401 BasicBlock *ExitBlock = ExitBlocks[0];
1402 BasicBlock::iterator BI = ExitBlock->begin();
1403 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1404 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1405
1406 // If the Incoming value of P is found in RewritePhiSet, we know it
1407 // could be rewritten to use a loop invariant value in transformation
1408 // phase later. Skip it in the loop invariant check below.
1409 bool found = false;
1410 for (const RewritePhi &Phi : RewritePhiSet) {
1411 unsigned i = Phi.Ith;
1412 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1413 found = true;
1414 break;
1415 }
1416 }
1417
1418 Instruction *I;
1419 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1420 if (!L->hasLoopInvariantOperands(I))
1421 return false;
1422
1423 ++BI;
1424 }
1425
1426 for (auto *BB : L->blocks())
1427 if (llvm::any_of(*BB, [](Instruction &I) {
1428 return I.mayHaveSideEffects();
1429 }))
1430 return false;
1431
1432 return true;
1433}
1434
1435/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1436/// and returns true if this Phi is an induction phi in the loop. When
1437/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1438static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1440 if (!Phi)
1441 return false;
1442 if (!L->getLoopPreheader())
1443 return false;
1444 if (Phi->getParent() != L->getHeader())
1445 return false;
1446 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1447}
1448
1450 ScalarEvolution *SE,
1451 const TargetTransformInfo *TTI,
1452 SCEVExpander &Rewriter, DominatorTree *DT,
1455 // Check a pre-condition.
1456 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1457 "Indvars did not preserve LCSSA!");
1458
1459 SmallVector<BasicBlock*, 8> ExitBlocks;
1460 L->getUniqueExitBlocks(ExitBlocks);
1461
1462 SmallVector<RewritePhi, 8> RewritePhiSet;
1463 // Find all values that are computed inside the loop, but used outside of it.
1464 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1465 // the exit blocks of the loop to find them.
1466 for (BasicBlock *ExitBB : ExitBlocks) {
1467 // If there are no PHI nodes in this exit block, then no values defined
1468 // inside the loop are used on this path, skip it.
1469 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1470 if (!PN) continue;
1471
1472 unsigned NumPreds = PN->getNumIncomingValues();
1473
1474 // Iterate over all of the PHI nodes.
1475 BasicBlock::iterator BBI = ExitBB->begin();
1476 while ((PN = dyn_cast<PHINode>(BBI++))) {
1477 if (PN->use_empty())
1478 continue; // dead use, don't replace it
1479
1480 if (!SE->isSCEVable(PN->getType()))
1481 continue;
1482
1483 // Iterate over all of the values in all the PHI nodes.
1484 for (unsigned i = 0; i != NumPreds; ++i) {
1485 // If the value being merged in is not integer or is not defined
1486 // in the loop, skip it.
1487 Value *InVal = PN->getIncomingValue(i);
1488 if (!isa<Instruction>(InVal))
1489 continue;
1490
1491 // If this pred is for a subloop, not L itself, skip it.
1492 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1493 continue; // The Block is in a subloop, skip it.
1494
1495 // Check that InVal is defined in the loop.
1496 Instruction *Inst = cast<Instruction>(InVal);
1497 if (!L->contains(Inst))
1498 continue;
1499
1500 // Find exit values which are induction variables in the loop, and are
1501 // unused in the loop, with the only use being the exit block PhiNode,
1502 // and the induction variable update binary operator.
1503 // The exit value can be replaced with the final value when it is cheap
1504 // to do so.
1507 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1508 if (IndPhi) {
1509 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1510 continue;
1511 // This is an induction PHI. Check that the only users are PHI
1512 // nodes, and induction variable update binary operators.
1513 if (llvm::any_of(Inst->users(), [&](User *U) {
1514 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1515 return true;
1516 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1517 if (B && B != ID.getInductionBinOp())
1518 return true;
1519 return false;
1520 }))
1521 continue;
1522 } else {
1523 // If it is not an induction phi, it must be an induction update
1524 // binary operator with an induction phi user.
1525 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1526 if (!B)
1527 continue;
1528 if (llvm::any_of(Inst->users(), [&](User *U) {
1529 PHINode *Phi = dyn_cast<PHINode>(U);
1530 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1531 return true;
1532 return false;
1533 }))
1534 continue;
1535 if (B != ID.getInductionBinOp())
1536 continue;
1537 }
1538 }
1539
1540 // Okay, this instruction has a user outside of the current loop
1541 // and varies predictably *inside* the loop. Evaluate the value it
1542 // contains when the loop exits, if possible. We prefer to start with
1543 // expressions which are true for all exits (so as to maximize
1544 // expression reuse by the SCEVExpander), but resort to per-exit
1545 // evaluation if that fails.
1546 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1547 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1548 !SE->isLoopInvariant(ExitValue, L) ||
1549 !Rewriter.isSafeToExpand(ExitValue)) {
1550 // TODO: This should probably be sunk into SCEV in some way; maybe a
1551 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1552 // most SCEV expressions and other recurrence types (e.g. shift
1553 // recurrences). Is there existing code we can reuse?
1554 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1555 if (isa<SCEVCouldNotCompute>(ExitCount))
1556 continue;
1557 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1558 if (AddRec->getLoop() == L)
1559 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1560 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1561 !SE->isLoopInvariant(ExitValue, L) ||
1562 !Rewriter.isSafeToExpand(ExitValue))
1563 continue;
1564 }
1565
1566 // Computing the value outside of the loop brings no benefit if it is
1567 // definitely used inside the loop in a way which can not be optimized
1568 // away. Avoid doing so unless we know we have a value which computes
1569 // the ExitValue already. TODO: This should be merged into SCEV
1570 // expander to leverage its knowledge of existing expressions.
1571 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1572 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1573 continue;
1574
1575 // Check if expansions of this SCEV would count as being high cost.
1576 bool HighCost = Rewriter.isHighCostExpansion(
1577 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1578
1579 // Note that we must not perform expansions until after
1580 // we query *all* the costs, because if we perform temporary expansion
1581 // inbetween, one that we might not intend to keep, said expansion
1582 // *may* affect cost calculation of the next SCEV's we'll query,
1583 // and next SCEV may errneously get smaller cost.
1584
1585 // Collect all the candidate PHINodes to be rewritten.
1586 Instruction *InsertPt =
1587 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1588 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1589 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1590 }
1591 }
1592 }
1593
1594 // TODO: evaluate whether it is beneficial to change how we calculate
1595 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1596 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1597 // potentially giving cost bonus to those other SCEV's?
1598
1599 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1600 int NumReplaced = 0;
1601
1602 // Transformation.
1603 for (const RewritePhi &Phi : RewritePhiSet) {
1604 PHINode *PN = Phi.PN;
1605
1606 // Only do the rewrite when the ExitValue can be expanded cheaply.
1607 // If LoopCanBeDel is true, rewrite exit value aggressively.
1610 !LoopCanBeDel && Phi.HighCost)
1611 continue;
1612
1613 Value *ExitVal = Rewriter.expandCodeFor(
1614 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1615
1616 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1617 << '\n'
1618 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1619
1620#ifndef NDEBUG
1621 // If we reuse an instruction from a loop which is neither L nor one of
1622 // its containing loops, we end up breaking LCSSA form for this loop by
1623 // creating a new use of its instruction.
1624 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1625 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1626 if (EVL != L)
1627 assert(EVL->contains(L) && "LCSSA breach detected!");
1628#endif
1629
1630 NumReplaced++;
1631 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1632 PN->setIncomingValue(Phi.Ith, ExitVal);
1633 // It's necessary to tell ScalarEvolution about this explicitly so that
1634 // it can walk the def-use list and forget all SCEVs, as it may not be
1635 // watching the PHI itself. Once the new exit value is in place, there
1636 // may not be a def-use connection between the loop and every instruction
1637 // which got a SCEVAddRecExpr for that loop.
1638 SE->forgetValue(PN);
1639
1640 // If this instruction is dead now, delete it. Don't do it now to avoid
1641 // invalidating iterators.
1642 if (isInstructionTriviallyDead(Inst, TLI))
1643 DeadInsts.push_back(Inst);
1644
1645 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1646 if (PN->getNumIncomingValues() == 1 &&
1647 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1648 PN->replaceAllUsesWith(ExitVal);
1649 PN->eraseFromParent();
1650 }
1651 }
1652
1653 // The insertion point instruction may have been deleted; clear it out
1654 // so that the rewriter doesn't trip over it later.
1655 Rewriter.clearInsertPoint();
1656 return NumReplaced;
1657}
1658
1659/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1660/// \p OrigLoop.
1661void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1662 Loop *RemainderLoop, uint64_t UF) {
1663 assert(UF > 0 && "Zero unrolled factor is not supported");
1664 assert(UnrolledLoop != RemainderLoop &&
1665 "Unrolled and Remainder loops are expected to distinct");
1666
1667 // Get number of iterations in the original scalar loop.
1668 unsigned OrigLoopInvocationWeight = 0;
1669 std::optional<unsigned> OrigAverageTripCount =
1670 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1671 if (!OrigAverageTripCount)
1672 return;
1673
1674 // Calculate number of iterations in unrolled loop.
1675 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1676 // Calculate number of iterations for remainder loop.
1677 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1678
1679 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1680 OrigLoopInvocationWeight);
1681 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1682 OrigLoopInvocationWeight);
1683}
1684
1685/// Utility that implements appending of loops onto a worklist.
1686/// Loops are added in preorder (analogous for reverse postorder for trees),
1687/// and the worklist is processed LIFO.
1688template <typename RangeT>
1690 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1691 // We use an internal worklist to build up the preorder traversal without
1692 // recursion.
1693 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1694
1695 // We walk the initial sequence of loops in reverse because we generally want
1696 // to visit defs before uses and the worklist is LIFO.
1697 for (Loop *RootL : Loops) {
1698 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1699 assert(PreOrderWorklist.empty() &&
1700 "Must start with an empty preorder walk worklist.");
1701 PreOrderWorklist.push_back(RootL);
1702 do {
1703 Loop *L = PreOrderWorklist.pop_back_val();
1704 PreOrderWorklist.append(L->begin(), L->end());
1705 PreOrderLoops.push_back(L);
1706 } while (!PreOrderWorklist.empty());
1707
1708 Worklist.insert(std::move(PreOrderLoops));
1709 PreOrderLoops.clear();
1710 }
1711}
1712
1713template <typename RangeT>
1717}
1718
1719template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1721
1722template void
1723llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1725
1728 appendReversedLoopsToWorklist(LI, Worklist);
1729}
1730
1732 LoopInfo *LI, LPPassManager *LPM) {
1733 Loop &New = *LI->AllocateLoop();
1734 if (PL)
1735 PL->addChildLoop(&New);
1736 else
1737 LI->addTopLevelLoop(&New);
1738
1739 if (LPM)
1740 LPM->addLoop(New);
1741
1742 // Add all of the blocks in L to the new loop.
1743 for (BasicBlock *BB : L->blocks())
1744 if (LI->getLoopFor(BB) == L)
1745 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1746
1747 // Add all of the subloops to the new loop.
1748 for (Loop *I : *L)
1749 cloneLoop(I, &New, VM, LI, LPM);
1750
1751 return &New;
1752}
1753
1754/// IR Values for the lower and upper bounds of a pointer evolution. We
1755/// need to use value-handles because SCEV expansion can invalidate previously
1756/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1757/// a previous one.
1762};
1763
1764/// Expand code for the lower and upper bound of the pointer group \p CG
1765/// in \p TheLoop. \return the values for the bounds.
1767 Loop *TheLoop, Instruction *Loc,
1768 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1769 LLVMContext &Ctx = Loc->getContext();
1770 Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
1771
1772 Value *Start = nullptr, *End = nullptr;
1773 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1774 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1775
1776 // If the Low and High values are themselves loop-variant, then we may want
1777 // to expand the range to include those covered by the outer loop as well.
1778 // There is a trade-off here with the advantage being that creating checks
1779 // using the expanded range permits the runtime memory checks to be hoisted
1780 // out of the outer loop. This reduces the cost of entering the inner loop,
1781 // which can be significant for low trip counts. The disadvantage is that
1782 // there is a chance we may now never enter the vectorized inner loop,
1783 // whereas using a restricted range check could have allowed us to enter at
1784 // least once. This is why the behaviour is not currently the default and is
1785 // controlled by the parameter 'HoistRuntimeChecks'.
1786 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1787 isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
1788 auto *HighAR = cast<SCEVAddRecExpr>(High);
1789 auto *LowAR = cast<SCEVAddRecExpr>(Low);
1790 const Loop *OuterLoop = TheLoop->getParentLoop();
1791 ScalarEvolution &SE = *Exp.getSE();
1792 const SCEV *Recur = LowAR->getStepRecurrence(SE);
1793 if (Recur == HighAR->getStepRecurrence(SE) &&
1794 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1795 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1796 const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch);
1797 if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1798 OuterExitCount->getType()->isIntegerTy()) {
1799 const SCEV *NewHigh =
1800 cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE);
1801 if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1802 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1803 "outer loop in order to permit hoisting\n");
1804 High = NewHigh;
1805 Low = cast<SCEVAddRecExpr>(Low)->getStart();
1806 // If there is a possibility that the stride is negative then we have
1807 // to generate extra checks to ensure the stride is positive.
1808 if (!SE.isKnownNonNegative(
1809 SE.applyLoopGuards(Recur, HighAR->getLoop()))) {
1810 Stride = Recur;
1811 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1812 "positive: "
1813 << *Stride << '\n');
1814 }
1815 }
1816 }
1817 }
1818 }
1819
1820 Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
1821 End = Exp.expandCodeFor(High, PtrArithTy, Loc);
1822 if (CG->NeedsFreeze) {
1823 IRBuilder<> Builder(Loc);
1824 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1825 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1826 }
1827 Value *StrideVal =
1828 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
1829 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1830 return {Start, End, StrideVal};
1831}
1832
1833/// Turns a collection of checks into a collection of expanded upper and
1834/// lower bounds for both pointers in the check.
1837 Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
1839
1840 // Here we're relying on the SCEV Expander's cache to only emit code for the
1841 // same bounds once.
1842 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1843 [&](const RuntimePointerCheck &Check) {
1844 PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
1846 Second = expandBounds(Check.second, L, Loc, Exp,
1848 return std::make_pair(First, Second);
1849 });
1850
1851 return ChecksWithBounds;
1852}
1853
1855 Instruction *Loc, Loop *TheLoop,
1856 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1857 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1858 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1859 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1860 auto ExpandedChecks =
1861 expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
1862
1863 LLVMContext &Ctx = Loc->getContext();
1864 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1865 Loc->getDataLayout());
1866 ChkBuilder.SetInsertPoint(Loc);
1867 // Our instructions might fold to a constant.
1868 Value *MemoryRuntimeCheck = nullptr;
1869
1870 for (const auto &[A, B] : ExpandedChecks) {
1871 // Check if two pointers (A and B) conflict where conflict is computed as:
1872 // start(A) <= end(B) && start(B) <= end(A)
1873
1874 assert((A.Start->getType()->getPointerAddressSpace() ==
1875 B.End->getType()->getPointerAddressSpace()) &&
1876 (B.Start->getType()->getPointerAddressSpace() ==
1877 A.End->getType()->getPointerAddressSpace()) &&
1878 "Trying to bounds check pointers with different address spaces");
1879
1880 // [A|B].Start points to the first accessed byte under base [A|B].
1881 // [A|B].End points to the last accessed byte, plus one.
1882 // There is no conflict when the intervals are disjoint:
1883 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1884 //
1885 // bound0 = (B.Start < A.End)
1886 // bound1 = (A.Start < B.End)
1887 // IsConflict = bound0 & bound1
1888 Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
1889 Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
1890 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1891 if (A.StrideToCheck) {
1892 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1893 A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
1894 "stride.check");
1895 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1896 }
1897 if (B.StrideToCheck) {
1898 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1899 B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
1900 "stride.check");
1901 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1902 }
1903 if (MemoryRuntimeCheck) {
1904 IsConflict =
1905 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1906 }
1907 MemoryRuntimeCheck = IsConflict;
1908 }
1909
1910 return MemoryRuntimeCheck;
1911}
1912
1914 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1915 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1916
1917 LLVMContext &Ctx = Loc->getContext();
1918 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1919 Loc->getDataLayout());
1920 ChkBuilder.SetInsertPoint(Loc);
1921 // Our instructions might fold to a constant.
1922 Value *MemoryRuntimeCheck = nullptr;
1923
1924 auto &SE = *Expander.getSE();
1925 // Map to keep track of created compares, The key is the pair of operands for
1926 // the compare, to allow detecting and re-using redundant compares.
1928 for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
1929 Type *Ty = SinkStart->getType();
1930 // Compute VF * IC * AccessSize.
1931 auto *VFTimesUFTimesSize =
1932 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1933 ConstantInt::get(Ty, IC * AccessSize));
1934 Value *Diff =
1935 Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
1936
1937 // Check if the same compare has already been created earlier. In that case,
1938 // there is no need to check it again.
1939 Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
1940 if (IsConflict)
1941 continue;
1942
1943 IsConflict =
1944 ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1945 SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
1946 if (NeedsFreeze)
1947 IsConflict =
1948 ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
1949 if (MemoryRuntimeCheck) {
1950 IsConflict =
1951 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1952 }
1953 MemoryRuntimeCheck = IsConflict;
1954 }
1955
1956 return MemoryRuntimeCheck;
1957}
1958
1959std::optional<IVConditionInfo>
1961 const MemorySSA &MSSA, AAResults &AA) {
1962 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
1963 if (!TI || !TI->isConditional())
1964 return {};
1965
1966 auto *CondI = dyn_cast<Instruction>(TI->getCondition());
1967 // The case with the condition outside the loop should already be handled
1968 // earlier.
1969 // Allow CmpInst and TruncInsts as they may be users of load instructions
1970 // and have potential for partial unswitching
1971 if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))
1972 return {};
1973
1974 SmallVector<Instruction *> InstToDuplicate;
1975 InstToDuplicate.push_back(CondI);
1976
1977 SmallVector<Value *, 4> WorkList;
1978 WorkList.append(CondI->op_begin(), CondI->op_end());
1979
1980 SmallVector<MemoryAccess *, 4> AccessesToCheck;
1981 SmallVector<MemoryLocation, 4> AccessedLocs;
1982 while (!WorkList.empty()) {
1983 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
1984 if (!I || !L.contains(I))
1985 continue;
1986
1987 // TODO: support additional instructions.
1988 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
1989 return {};
1990
1991 // Do not duplicate volatile and atomic loads.
1992 if (auto *LI = dyn_cast<LoadInst>(I))
1993 if (LI->isVolatile() || LI->isAtomic())
1994 return {};
1995
1996 InstToDuplicate.push_back(I);
1997 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1998 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
1999 // Queue the defining access to check for alias checks.
2000 AccessesToCheck.push_back(MemUse->getDefiningAccess());
2001 AccessedLocs.push_back(MemoryLocation::get(I));
2002 } else {
2003 // MemoryDefs may clobber the location or may be atomic memory
2004 // operations. Bail out.
2005 return {};
2006 }
2007 }
2008 WorkList.append(I->op_begin(), I->op_end());
2009 }
2010
2011 if (InstToDuplicate.empty())
2012 return {};
2013
2014 SmallVector<BasicBlock *, 4> ExitingBlocks;
2015 L.getExitingBlocks(ExitingBlocks);
2016 auto HasNoClobbersOnPath =
2017 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2018 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
2019 SmallVector<MemoryAccess *, 4> AccessesToCheck)
2020 -> std::optional<IVConditionInfo> {
2022 // First, collect all blocks in the loop that are on a patch from Succ
2023 // to the header.
2025 WorkList.push_back(Succ);
2026 WorkList.push_back(Header);
2028 Seen.insert(Header);
2029 Info.PathIsNoop &=
2030 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2031
2032 while (!WorkList.empty()) {
2033 BasicBlock *Current = WorkList.pop_back_val();
2034 if (!L.contains(Current))
2035 continue;
2036 const auto &SeenIns = Seen.insert(Current);
2037 if (!SeenIns.second)
2038 continue;
2039
2040 Info.PathIsNoop &= all_of(
2041 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2042 WorkList.append(succ_begin(Current), succ_end(Current));
2043 }
2044
2045 // Require at least 2 blocks on a path through the loop. This skips
2046 // paths that directly exit the loop.
2047 if (Seen.size() < 2)
2048 return {};
2049
2050 // Next, check if there are any MemoryDefs that are on the path through
2051 // the loop (in the Seen set) and they may-alias any of the locations in
2052 // AccessedLocs. If that is the case, they may modify the condition and
2053 // partial unswitching is not possible.
2054 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
2055 while (!AccessesToCheck.empty()) {
2056 MemoryAccess *Current = AccessesToCheck.pop_back_val();
2057 auto SeenI = SeenAccesses.insert(Current);
2058 if (!SeenI.second || !Seen.contains(Current->getBlock()))
2059 continue;
2060
2061 // Bail out if exceeded the threshold.
2062 if (SeenAccesses.size() >= MSSAThreshold)
2063 return {};
2064
2065 // MemoryUse are read-only accesses.
2066 if (isa<MemoryUse>(Current))
2067 continue;
2068
2069 // For a MemoryDef, check if is aliases any of the location feeding
2070 // the original condition.
2071 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
2072 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
2073 return isModSet(
2074 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
2075 }))
2076 return {};
2077 }
2078
2079 for (Use &U : Current->uses())
2080 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
2081 }
2082
2083 // We could also allow loops with known trip counts without mustprogress,
2084 // but ScalarEvolution may not be available.
2085 Info.PathIsNoop &= isMustProgress(&L);
2086
2087 // If the path is considered a no-op so far, check if it reaches a
2088 // single exit block without any phis. This ensures no values from the
2089 // loop are used outside of the loop.
2090 if (Info.PathIsNoop) {
2091 for (auto *Exiting : ExitingBlocks) {
2092 if (!Seen.contains(Exiting))
2093 continue;
2094 for (auto *Succ : successors(Exiting)) {
2095 if (L.contains(Succ))
2096 continue;
2097
2098 Info.PathIsNoop &= Succ->phis().empty() &&
2099 (!Info.ExitForPath || Info.ExitForPath == Succ);
2100 if (!Info.PathIsNoop)
2101 break;
2102 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
2103 "cannot have multiple exit blocks");
2104 Info.ExitForPath = Succ;
2105 }
2106 }
2107 }
2108 if (!Info.ExitForPath)
2109 Info.PathIsNoop = false;
2110
2111 Info.InstToDuplicate = InstToDuplicate;
2112 return Info;
2113 };
2114
2115 // If we branch to the same successor, partial unswitching will not be
2116 // beneficial.
2117 if (TI->getSuccessor(0) == TI->getSuccessor(1))
2118 return {};
2119
2120 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2121 AccessesToCheck)) {
2122 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2123 return Info;
2124 }
2125 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2126 AccessesToCheck)) {
2127 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2128 return Info;
2129 }
2130
2131 return {};
2132}
amdgpu AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:87
std::string Name
bool End
Definition: ELF_riscv.cpp:480
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:822
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1343
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:55
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1766
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1384
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:54
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:203
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:804
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1438
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t High
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:196
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:283
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:307
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
This class represents an Operation in the Expression.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
CallInst * CreateMulReduce(Value *Src)
Create a vector int mul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:438
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:418
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2262
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2465
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1268
CallInst * CreateAndReduce(Value *Src)
Create a vector int AND reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:442
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:434
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2540
CallInst * CreateXorReduce(Value *Src)
Create a vector int XOR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:450
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:446
CallInst * CreateFPMinReduce(Value *Src)
Create a vector float min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:470
CallInst * CreateFPMaximumReduce(Value *Src)
Create a vector float maximum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:474
CallInst * CreateFPMaxReduce(Value *Src)
Create a vector float max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:466
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2371
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1125
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:454
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:468
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1119
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2278
CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:460
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:426
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1366
CallInst * CreateFPMinimumReduce(Value *Src)
Create a vector float minimum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:478
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void addLoop(Loop &L)
Definition: LoopPass.cpp:77
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:598
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:444
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:887
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:1071
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
LLVMContext & getContext() const
Definition: Metadata.h:1231
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
Tuple of metadata.
Definition: Metadata.h:1470
BasicBlock * getBlock() const
Definition: MemorySSA.h:165
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:985
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
Root of the metadata hierarchy.
Definition: Metadata.h:62
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Value * createSimpleTargetReduction(RecurKind Kind, Type *ValTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Emit a VP reduction intrinsic call for recurrence kind.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:250
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1854
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:849
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:950
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1097
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
Definition: LoopUtils.cpp:1309
Value * createSimpleTargetReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:1172
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1079
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1689
auto successors(const MachineBasicBlock *BB)
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:189
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:425
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:921
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
char & LCSSAID
Definition: LCSSA.cpp:507
char & LoopSimplifyID
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:214
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1327
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:467
TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:391
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1150
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:483
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1242
cl::opt< unsigned > SCEVCheapExpansionBudget
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1079
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:427
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:724
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1268
bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
Definition: LoopUtils.cpp:1302
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2837
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1101
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1015
TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:437
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:280
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:300
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:283
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:867
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1661
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
DWARFExpression::Operation Op
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1714
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1288
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:899
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1449
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:123
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:1913
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Definition: LoopUtils.cpp:996
ReplaceExitVal
Definition: LoopUtils.h:464
@ UnusedIndVarInLoop
Definition: LoopUtils.h:468
@ OnlyCheapRepl
Definition: LoopUtils.h:466
@ AlwaysRepl
Definition: LoopUtils.h:469
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1960
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1316
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1295
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:1054
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1029
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1226
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1731
Value * createAnyOfTargetReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a target reduction of the given vector Src for a reduction of the kind RecurKind::IAnyOf or Re...
Definition: LoopUtils.cpp:1137
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1758
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1759
TrackingVH< Value > End
Definition: LoopUtils.cpp:1760
Value * StrideToCheck
Definition: LoopUtils.cpp:1761
unsigned Ith
Definition: LoopUtils.cpp:1370
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1375
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1371
PHINode * PN
Definition: LoopUtils.cpp:1369
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1372
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:542
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.