LLVM 20.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(PredBB)) {
75 if (isa<IndirectBrInst>(PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Users, [&](User *U) {
132 auto *Use = cast<Instruction>(U);
133 return !L->contains(Use->getParent());
134 }))
135 UsedOutside.push_back(&Inst);
136 }
137
138 return UsedOutside;
139}
140
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
206 MDString::get(Context, Name),
207 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
225 if (S && S->getString() == StringMD) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(0, NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(*Width, IsScalable.value_or(false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
278 MDs.push_back(nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(0).get();
294 if (!isa<MDString>(NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.starts_with(InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
320 MDs.push_back(Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
341 return FollowupLoopID;
342}
343
346}
347
350}
351
353 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
374 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
428 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
438 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
470 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
471 assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");
472 Value *IncV = PN->getIncomingValue(LatchIdx);
473
474 for (User *U : PN->users())
475 if (U != Cond && U != IncV) return false;
476
477 for (User *U : IncV->users())
478 if (U != Cond && U != PN) return false;
479 return true;
480}
481
482
484 LoopInfo *LI, MemorySSA *MSSA) {
485 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
486 auto *Preheader = L->getLoopPreheader();
487 assert(Preheader && "Preheader should exist!");
488
489 std::unique_ptr<MemorySSAUpdater> MSSAU;
490 if (MSSA)
491 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
492
493 // Now that we know the removal is safe, remove the loop by changing the
494 // branch from the preheader to go to the single exit block.
495 //
496 // Because we're deleting a large chunk of code at once, the sequence in which
497 // we remove things is very important to avoid invalidation issues.
498
499 // Tell ScalarEvolution that the loop is deleted. Do this before
500 // deleting the loop so that ScalarEvolution can look at the loop
501 // to determine what it needs to clean up.
502 if (SE) {
503 SE->forgetLoop(L);
505 }
506
507 Instruction *OldTerm = Preheader->getTerminator();
508 assert(!OldTerm->mayHaveSideEffects() &&
509 "Preheader must end with a side-effect-free terminator");
510 assert(OldTerm->getNumSuccessors() == 1 &&
511 "Preheader must have a single successor");
512 // Connect the preheader to the exit block. Keep the old edge to the header
513 // around to perform the dominator tree update in two separate steps
514 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
515 // preheader -> header.
516 //
517 //
518 // 0. Preheader 1. Preheader 2. Preheader
519 // | | | |
520 // V | V |
521 // Header <--\ | Header <--\ | Header <--\
522 // | | | | | | | | | | |
523 // | V | | | V | | | V |
524 // | Body --/ | | Body --/ | | Body --/
525 // V V V V V
526 // Exit Exit Exit
527 //
528 // By doing this is two separate steps we can perform the dominator tree
529 // update without using the batch update API.
530 //
531 // Even when the loop is never executed, we cannot remove the edge from the
532 // source block to the exit block. Consider the case where the unexecuted loop
533 // branches back to an outer loop. If we deleted the loop and removed the edge
534 // coming to this inner loop, this will break the outer loop structure (by
535 // deleting the backedge of the outer loop). If the outer loop is indeed a
536 // non-loop, it will be deleted in a future iteration of loop deletion pass.
537 IRBuilder<> Builder(OldTerm);
538
539 auto *ExitBlock = L->getUniqueExitBlock();
540 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
541 if (ExitBlock) {
542 assert(ExitBlock && "Should have a unique exit block!");
543 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
544
545 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
546 // Remove the old branch. The conditional branch becomes a new terminator.
547 OldTerm->eraseFromParent();
548
549 // Rewrite phis in the exit block to get their inputs from the Preheader
550 // instead of the exiting block.
551 for (PHINode &P : ExitBlock->phis()) {
552 // Set the zero'th element of Phi to be from the preheader and remove all
553 // other incoming values. Given the loop has dedicated exits, all other
554 // incoming values must be from the exiting blocks.
555 int PredIndex = 0;
556 P.setIncomingBlock(PredIndex, Preheader);
557 // Removes all incoming values from all other exiting blocks (including
558 // duplicate values from an exiting block).
559 // Nuke all entries except the zero'th entry which is the preheader entry.
560 P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
561 /* DeletePHIIfEmpty */ false);
562
563 assert((P.getNumIncomingValues() == 1 &&
564 P.getIncomingBlock(PredIndex) == Preheader) &&
565 "Should have exactly one value and that's from the preheader!");
566 }
567
568 if (DT) {
569 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
570 if (MSSA) {
571 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
572 *DT);
573 if (VerifyMemorySSA)
574 MSSA->verifyMemorySSA();
575 }
576 }
577
578 // Disconnect the loop body by branching directly to its exit.
579 Builder.SetInsertPoint(Preheader->getTerminator());
580 Builder.CreateBr(ExitBlock);
581 // Remove the old branch.
582 Preheader->getTerminator()->eraseFromParent();
583 } else {
584 assert(L->hasNoExitBlocks() &&
585 "Loop should have either zero or one exit blocks.");
586
587 Builder.SetInsertPoint(OldTerm);
588 Builder.CreateUnreachable();
589 Preheader->getTerminator()->eraseFromParent();
590 }
591
592 if (DT) {
593 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
594 if (MSSA) {
595 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
596 *DT);
597 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
598 L->block_end());
599 MSSAU->removeBlocks(DeadBlockSet);
600 if (VerifyMemorySSA)
601 MSSA->verifyMemorySSA();
602 }
603 }
604
605 // Use a map to unique and a vector to guarantee deterministic ordering.
608 llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;
609
610 if (ExitBlock) {
611 // Given LCSSA form is satisfied, we should not have users of instructions
612 // within the dead loop outside of the loop. However, LCSSA doesn't take
613 // unreachable uses into account. We handle them here.
614 // We could do it after drop all references (in this case all users in the
615 // loop will be already eliminated and we have less work to do but according
616 // to API doc of User::dropAllReferences only valid operation after dropping
617 // references, is deletion. So let's substitute all usages of
618 // instruction from the loop with poison value of corresponding type first.
619 for (auto *Block : L->blocks())
620 for (Instruction &I : *Block) {
621 auto *Poison = PoisonValue::get(I.getType());
622 for (Use &U : llvm::make_early_inc_range(I.uses())) {
623 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
624 if (L->contains(Usr->getParent()))
625 continue;
626 // If we have a DT then we can check that uses outside a loop only in
627 // unreachable block.
628 if (DT)
630 "Unexpected user in reachable block");
631 U.set(Poison);
632 }
633
634 // RemoveDIs: do the same as below for DbgVariableRecords.
635 if (Block->IsNewDbgInfoFormat) {
637 filterDbgVars(I.getDbgRecordRange()))) {
638 DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
639 DVR.getDebugLoc().get());
640 if (!DeadDebugSet.insert(Key).second)
641 continue;
642 // Unlinks the DVR from it's container, for later insertion.
643 DVR.removeFromParent();
644 DeadDbgVariableRecords.push_back(&DVR);
645 }
646 }
647
648 // For one of each variable encountered, preserve a debug intrinsic (set
649 // to Poison) and transfer it to the loop exit. This terminates any
650 // variable locations that were set during the loop.
651 auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
652 if (!DVI)
653 continue;
654 if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
655 continue;
656 DeadDebugInst.push_back(DVI);
657 }
658
659 // After the loop has been deleted all the values defined and modified
660 // inside the loop are going to be unavailable. Values computed in the
661 // loop will have been deleted, automatically causing their debug uses
662 // be be replaced with undef. Loop invariant values will still be available.
663 // Move dbg.values out the loop so that earlier location ranges are still
664 // terminated and loop invariant assignments are preserved.
665 DIBuilder DIB(*ExitBlock->getModule());
666 BasicBlock::iterator InsertDbgValueBefore =
667 ExitBlock->getFirstInsertionPt();
668 assert(InsertDbgValueBefore != ExitBlock->end() &&
669 "There should be a non-PHI instruction in exit block, else these "
670 "instructions will have no parent.");
671
672 for (auto *DVI : DeadDebugInst)
673 DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
674
675 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
676 // each DbgVariableRecord right at the start of the block, wheras dbg.values
677 // would be repeatedly inserted before the first instruction. To replicate
678 // this behaviour, do it backwards.
679 for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords))
680 ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);
681 }
682
683 // Remove the block from the reference counting scheme, so that we can
684 // delete it freely later.
685 for (auto *Block : L->blocks())
686 Block->dropAllReferences();
687
688 if (MSSA && VerifyMemorySSA)
689 MSSA->verifyMemorySSA();
690
691 if (LI) {
692 // Erase the instructions and the blocks without having to worry
693 // about ordering because we already dropped the references.
694 // NOTE: This iteration is safe because erasing the block does not remove
695 // its entry from the loop's block list. We do that in the next section.
696 for (BasicBlock *BB : L->blocks())
697 BB->eraseFromParent();
698
699 // Finally, the blocks from loopinfo. This has to happen late because
700 // otherwise our loop iterators won't work.
701
703 blocks.insert(L->block_begin(), L->block_end());
704 for (BasicBlock *BB : blocks)
705 LI->removeBlock(BB);
706
707 // The last step is to update LoopInfo now that we've eliminated this loop.
708 // Note: LoopInfo::erase remove the given loop and relink its subloops with
709 // its parent. While removeLoop/removeChildLoop remove the given loop but
710 // not relink its subloops, which is what we want.
711 if (Loop *ParentLoop = L->getParentLoop()) {
712 Loop::iterator I = find(*ParentLoop, L);
713 assert(I != ParentLoop->end() && "Couldn't find loop");
714 ParentLoop->removeChildLoop(I);
715 } else {
716 Loop::iterator I = find(*LI, L);
717 assert(I != LI->end() && "Couldn't find loop");
718 LI->removeLoop(I);
719 }
720 LI->destroy(L);
721 }
722}
723
725 LoopInfo &LI, MemorySSA *MSSA) {
726 auto *Latch = L->getLoopLatch();
727 assert(Latch && "multiple latches not yet supported");
728 auto *Header = L->getHeader();
729 Loop *OutermostLoop = L->getOutermostLoop();
730
731 SE.forgetLoop(L);
733
734 std::unique_ptr<MemorySSAUpdater> MSSAU;
735 if (MSSA)
736 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
737
738 // Update the CFG and domtree. We chose to special case a couple of
739 // of common cases for code quality and test readability reasons.
740 [&]() -> void {
741 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
742 if (!BI->isConditional()) {
743 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
744 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
745 MSSAU.get());
746 return;
747 }
748
749 // Conditional latch/exit - note that latch can be shared by inner
750 // and outer loop so the other target doesn't need to an exit
751 if (L->isLoopExiting(Latch)) {
752 // TODO: Generalize ConstantFoldTerminator so that it can be used
753 // here without invalidating LCSSA or MemorySSA. (Tricky case for
754 // LCSSA: header is an exit block of a preceeding sibling loop w/o
755 // dedicated exits.)
756 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
757 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
758
759 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
760 Header->removePredecessor(Latch, true);
761
762 IRBuilder<> Builder(BI);
763 auto *NewBI = Builder.CreateBr(ExitBB);
764 // Transfer the metadata to the new branch instruction (minus the
765 // loop info since this is no longer a loop)
766 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
767 LLVMContext::MD_annotation});
768
769 BI->eraseFromParent();
770 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
771 if (MSSA)
772 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
773 return;
774 }
775 }
776
777 // General case. By splitting the backedge, and then explicitly making it
778 // unreachable we gracefully handle corner cases such as switch and invoke
779 // termiantors.
780 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
781
782 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
783 (void)changeToUnreachable(BackedgeBB->getTerminator(),
784 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
785 }();
786
787 // Erase (and destroy) this loop instance. Handles relinking sub-loops
788 // and blocks within the loop as needed.
789 LI.erase(L);
790
791 // If the loop we broke had a parent, then changeToUnreachable might have
792 // caused a block to be removed from the parent loop (see loop_nest_lcssa
793 // test case in zero-btc.ll for an example), thus changing the parent's
794 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
795 // loop which might have a had a block removed.
796 if (OutermostLoop != L)
797 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
798}
799
800
801/// Checks if \p L has an exiting latch branch. There may also be other
802/// exiting blocks. Returns branch instruction terminating the loop
803/// latch if above check is successful, nullptr otherwise.
805 BasicBlock *Latch = L->getLoopLatch();
806 if (!Latch)
807 return nullptr;
808
809 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
810 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
811 return nullptr;
812
813 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
814 LatchBR->getSuccessor(1) == L->getHeader()) &&
815 "At least one edge out of the latch must go to the header");
816
817 return LatchBR;
818}
819
820/// Return the estimated trip count for any exiting branch which dominates
821/// the loop latch.
822static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
823 Loop *L,
824 uint64_t &OrigExitWeight) {
825 // To estimate the number of times the loop body was executed, we want to
826 // know the number of times the backedge was taken, vs. the number of times
827 // we exited the loop.
828 uint64_t LoopWeight, ExitWeight;
829 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
830 return std::nullopt;
831
832 if (L->contains(ExitingBranch->getSuccessor(1)))
833 std::swap(LoopWeight, ExitWeight);
834
835 if (!ExitWeight)
836 // Don't have a way to return predicated infinite
837 return std::nullopt;
838
839 OrigExitWeight = ExitWeight;
840
841 // Estimated exit count is a ratio of the loop weight by the weight of the
842 // edge exiting the loop, rounded to nearest.
843 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
844 // Estimated trip count is one plus estimated exit count.
845 return ExitCount + 1;
846}
847
848std::optional<unsigned>
850 unsigned *EstimatedLoopInvocationWeight) {
851 // Currently we take the estimate exit count only from the loop latch,
852 // ignoring other exiting blocks. This can overestimate the trip count
853 // if we exit through another exit, but can never underestimate it.
854 // TODO: incorporate information from other exits
855 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
856 uint64_t ExitWeight;
857 if (std::optional<uint64_t> EstTripCount =
858 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
859 if (EstimatedLoopInvocationWeight)
860 *EstimatedLoopInvocationWeight = ExitWeight;
861 return *EstTripCount;
862 }
863 }
864 return std::nullopt;
865}
866
867bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
868 unsigned EstimatedloopInvocationWeight) {
869 // At the moment, we currently support changing the estimate trip count of
870 // the latch branch only. We could extend this API to manipulate estimated
871 // trip counts for any exit.
873 if (!LatchBranch)
874 return false;
875
876 // Calculate taken and exit weights.
877 unsigned LatchExitWeight = 0;
878 unsigned BackedgeTakenWeight = 0;
879
880 if (EstimatedTripCount > 0) {
881 LatchExitWeight = EstimatedloopInvocationWeight;
882 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
883 }
884
885 // Make a swap if back edge is taken when condition is "false".
886 if (LatchBranch->getSuccessor(0) != L->getHeader())
887 std::swap(BackedgeTakenWeight, LatchExitWeight);
888
889 MDBuilder MDB(LatchBranch->getContext());
890
891 // Set/Update profile metadata.
892 LatchBranch->setMetadata(
893 LLVMContext::MD_prof,
894 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
895
896 return true;
897}
898
900 ScalarEvolution &SE) {
901 Loop *OuterL = InnerLoop->getParentLoop();
902 if (!OuterL)
903 return true;
904
905 // Get the backedge taken count for the inner loop
906 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
907 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
908 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
909 !InnerLoopBECountSC->getType()->isIntegerTy())
910 return false;
911
912 // Get whether count is invariant to the outer loop
914 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
916 return false;
917
918 return true;
919}
920
922 switch (RK) {
923 default:
924 llvm_unreachable("Unexpected recurrence kind");
925 case RecurKind::Add:
926 return Intrinsic::vector_reduce_add;
927 case RecurKind::Mul:
928 return Intrinsic::vector_reduce_mul;
929 case RecurKind::And:
930 return Intrinsic::vector_reduce_and;
931 case RecurKind::Or:
932 return Intrinsic::vector_reduce_or;
933 case RecurKind::Xor:
934 return Intrinsic::vector_reduce_xor;
935 case RecurKind::FMulAdd:
936 case RecurKind::FAdd:
937 return Intrinsic::vector_reduce_fadd;
938 case RecurKind::FMul:
939 return Intrinsic::vector_reduce_fmul;
940 case RecurKind::SMax:
941 return Intrinsic::vector_reduce_smax;
942 case RecurKind::SMin:
943 return Intrinsic::vector_reduce_smin;
944 case RecurKind::UMax:
945 return Intrinsic::vector_reduce_umax;
946 case RecurKind::UMin:
947 return Intrinsic::vector_reduce_umin;
948 case RecurKind::FMax:
949 return Intrinsic::vector_reduce_fmax;
950 case RecurKind::FMin:
951 return Intrinsic::vector_reduce_fmin;
952 case RecurKind::FMaximum:
953 return Intrinsic::vector_reduce_fmaximum;
954 case RecurKind::FMinimum:
955 return Intrinsic::vector_reduce_fminimum;
956 }
957}
958
960 switch (RdxID) {
961 case Intrinsic::vector_reduce_fadd:
962 return Instruction::FAdd;
963 case Intrinsic::vector_reduce_fmul:
964 return Instruction::FMul;
965 case Intrinsic::vector_reduce_add:
966 return Instruction::Add;
967 case Intrinsic::vector_reduce_mul:
968 return Instruction::Mul;
969 case Intrinsic::vector_reduce_and:
970 return Instruction::And;
971 case Intrinsic::vector_reduce_or:
972 return Instruction::Or;
973 case Intrinsic::vector_reduce_xor:
974 return Instruction::Xor;
975 case Intrinsic::vector_reduce_smax:
976 case Intrinsic::vector_reduce_smin:
977 case Intrinsic::vector_reduce_umax:
978 case Intrinsic::vector_reduce_umin:
979 return Instruction::ICmp;
980 case Intrinsic::vector_reduce_fmax:
981 case Intrinsic::vector_reduce_fmin:
982 return Instruction::FCmp;
983 default:
984 llvm_unreachable("Unexpected ID");
985 }
986}
987
989 switch (RdxID) {
990 default:
991 llvm_unreachable("Unknown min/max recurrence kind");
992 case Intrinsic::vector_reduce_umin:
993 return Intrinsic::umin;
994 case Intrinsic::vector_reduce_umax:
995 return Intrinsic::umax;
996 case Intrinsic::vector_reduce_smin:
997 return Intrinsic::smin;
998 case Intrinsic::vector_reduce_smax:
999 return Intrinsic::smax;
1000 case Intrinsic::vector_reduce_fmin:
1001 return Intrinsic::minnum;
1002 case Intrinsic::vector_reduce_fmax:
1003 return Intrinsic::maxnum;
1004 case Intrinsic::vector_reduce_fminimum:
1005 return Intrinsic::minimum;
1006 case Intrinsic::vector_reduce_fmaximum:
1007 return Intrinsic::maximum;
1008 }
1009}
1010
1012 switch (RK) {
1013 default:
1014 llvm_unreachable("Unknown min/max recurrence kind");
1015 case RecurKind::UMin:
1016 return Intrinsic::umin;
1017 case RecurKind::UMax:
1018 return Intrinsic::umax;
1019 case RecurKind::SMin:
1020 return Intrinsic::smin;
1021 case RecurKind::SMax:
1022 return Intrinsic::smax;
1023 case RecurKind::FMin:
1024 return Intrinsic::minnum;
1025 case RecurKind::FMax:
1026 return Intrinsic::maxnum;
1027 case RecurKind::FMinimum:
1028 return Intrinsic::minimum;
1029 case RecurKind::FMaximum:
1030 return Intrinsic::maximum;
1031 }
1032}
1033
1035 switch (RdxID) {
1036 case Intrinsic::vector_reduce_smax:
1037 return RecurKind::SMax;
1038 case Intrinsic::vector_reduce_smin:
1039 return RecurKind::SMin;
1040 case Intrinsic::vector_reduce_umax:
1041 return RecurKind::UMax;
1042 case Intrinsic::vector_reduce_umin:
1043 return RecurKind::UMin;
1044 case Intrinsic::vector_reduce_fmax:
1045 return RecurKind::FMax;
1046 case Intrinsic::vector_reduce_fmin:
1047 return RecurKind::FMin;
1048 default:
1049 return RecurKind::None;
1050 }
1051}
1052
1054 switch (RK) {
1055 default:
1056 llvm_unreachable("Unknown min/max recurrence kind");
1057 case RecurKind::UMin:
1058 return CmpInst::ICMP_ULT;
1059 case RecurKind::UMax:
1060 return CmpInst::ICMP_UGT;
1061 case RecurKind::SMin:
1062 return CmpInst::ICMP_SLT;
1063 case RecurKind::SMax:
1064 return CmpInst::ICMP_SGT;
1065 case RecurKind::FMin:
1066 return CmpInst::FCMP_OLT;
1067 case RecurKind::FMax:
1068 return CmpInst::FCMP_OGT;
1069 // We do not add FMinimum/FMaximum recurrence kind here since there is no
1070 // equivalent predicate which compares signed zeroes according to the
1071 // semantics of the intrinsics (llvm.minimum/maximum).
1072 }
1073}
1074
1076 Value *Right) {
1077 Type *Ty = Left->getType();
1078 if (Ty->isIntOrIntVectorTy() ||
1079 (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
1080 // TODO: Add float minnum/maxnum support when FMF nnan is set.
1082 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
1083 "rdx.minmax");
1084 }
1086 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
1087 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
1088 return Select;
1089}
1090
1091// Helper to generate an ordered reduction.
1093 unsigned Op, RecurKind RdxKind) {
1094 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1095
1096 // Extract and apply reduction ops in ascending order:
1097 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1098 Value *Result = Acc;
1099 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1100 Value *Ext =
1101 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
1102
1103 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1104 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
1105 "bin.rdx");
1106 } else {
1108 "Invalid min/max");
1109 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
1110 }
1111 }
1112
1113 return Result;
1114}
1115
1116// Helper to generate a log2 shuffle reduction.
1118 unsigned Op,
1120 RecurKind RdxKind) {
1121 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1122 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1123 // and vector ops, reducing the set of values being computed by half each
1124 // round.
1125 assert(isPowerOf2_32(VF) &&
1126 "Reduction emission only supported for pow2 vectors!");
1127 // Note: fast-math-flags flags are controlled by the builder configuration
1128 // and are assumed to apply to all generated arithmetic instructions. Other
1129 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1130 // of the builder configuration, and since they're not passed explicitly,
1131 // will never be relevant here. Note that it would be generally unsound to
1132 // propagate these from an intrinsic call to the expansion anyways as we/
1133 // change the order of operations.
1134 auto BuildShuffledOp = [&Builder, &Op,
1135 &RdxKind](SmallVectorImpl<int> &ShuffleMask,
1136 Value *&TmpVec) -> void {
1137 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1138 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1139 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1140 "bin.rdx");
1141 } else {
1143 "Invalid min/max");
1144 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1145 }
1146 };
1147
1148 Value *TmpVec = Src;
1149 if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1150 SmallVector<int, 32> ShuffleMask(VF);
1151 for (unsigned stride = 1; stride < VF; stride <<= 1) {
1152 // Initialise the mask with undef.
1153 std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1);
1154 for (unsigned j = 0; j < VF; j += stride << 1) {
1155 ShuffleMask[j] = j + stride;
1156 }
1157 BuildShuffledOp(ShuffleMask, TmpVec);
1158 }
1159 } else {
1160 SmallVector<int, 32> ShuffleMask(VF);
1161 for (unsigned i = VF; i != 1; i >>= 1) {
1162 // Move the upper half of the vector to the lower half.
1163 for (unsigned j = 0; j != i / 2; ++j)
1164 ShuffleMask[j] = i / 2 + j;
1165
1166 // Fill the rest of the mask with undef.
1167 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1168 BuildShuffledOp(ShuffleMask, TmpVec);
1169 }
1170 }
1171 // The result is in the first element of the vector.
1172 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1173}
1174
1177 PHINode *OrigPhi) {
1178 assert(
1180 "Unexpected reduction kind");
1181 Value *InitVal = Desc.getRecurrenceStartValue();
1182 Value *NewVal = nullptr;
1183
1184 // First use the original phi to determine the new value we're trying to
1185 // select from in the loop.
1186 SelectInst *SI = nullptr;
1187 for (auto *U : OrigPhi->users()) {
1188 if ((SI = dyn_cast<SelectInst>(U)))
1189 break;
1190 }
1191 assert(SI && "One user of the original phi should be a select");
1192
1193 if (SI->getTrueValue() == OrigPhi)
1194 NewVal = SI->getFalseValue();
1195 else {
1196 assert(SI->getFalseValue() == OrigPhi &&
1197 "At least one input to the select should be the original Phi");
1198 NewVal = SI->getTrueValue();
1199 }
1200
1201 // If any predicate is true it means that we want to select the new value.
1202 Value *AnyOf =
1203 Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1204 // The compares in the loop may yield poison, which propagates through the
1205 // bitwise ORs. Freeze it here before the condition is used.
1206 AnyOf = Builder.CreateFreeze(AnyOf);
1207 return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
1208}
1209
1211 RecurKind RdxKind) {
1212 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1213 switch (RdxKind) {
1214 case RecurKind::Add:
1215 return Builder.CreateAddReduce(Src);
1216 case RecurKind::Mul:
1217 return Builder.CreateMulReduce(Src);
1218 case RecurKind::And:
1219 return Builder.CreateAndReduce(Src);
1220 case RecurKind::Or:
1221 return Builder.CreateOrReduce(Src);
1222 case RecurKind::Xor:
1223 return Builder.CreateXorReduce(Src);
1224 case RecurKind::FMulAdd:
1225 case RecurKind::FAdd:
1226 return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1227 Src);
1228 case RecurKind::FMul:
1229 return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1230 case RecurKind::SMax:
1231 return Builder.CreateIntMaxReduce(Src, true);
1232 case RecurKind::SMin:
1233 return Builder.CreateIntMinReduce(Src, true);
1234 case RecurKind::UMax:
1235 return Builder.CreateIntMaxReduce(Src, false);
1236 case RecurKind::UMin:
1237 return Builder.CreateIntMinReduce(Src, false);
1238 case RecurKind::FMax:
1239 return Builder.CreateFPMaxReduce(Src);
1240 case RecurKind::FMin:
1241 return Builder.CreateFPMinReduce(Src);
1242 case RecurKind::FMinimum:
1243 return Builder.CreateFPMinimumReduce(Src);
1244 case RecurKind::FMaximum:
1245 return Builder.CreateFPMaximumReduce(Src);
1246 default:
1247 llvm_unreachable("Unhandled opcode");
1248 }
1249}
1250
1252 const RecurrenceDescriptor &Desc) {
1253 RecurKind Kind = Desc.getRecurrenceKind();
1255 "AnyOf reduction is not supported.");
1257 auto *SrcTy = cast<VectorType>(Src->getType());
1258 Type *SrcEltTy = SrcTy->getElementType();
1259 Value *Iden =
1260 Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
1261 Value *Ops[] = {Iden, Src};
1262 return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);
1263}
1264
1266 const RecurrenceDescriptor &Desc, Value *Src,
1267 PHINode *OrigPhi) {
1268 // TODO: Support in-order reductions based on the recurrence descriptor.
1269 // All ops in the reduction inherit fast-math-flags from the recurrence
1270 // descriptor.
1272 B.setFastMathFlags(Desc.getFastMathFlags());
1273
1274 RecurKind RK = Desc.getRecurrenceKind();
1276 return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
1277
1278 return createSimpleTargetReduction(B, Src, RK);
1279}
1280
1283 Value *Src, Value *Start) {
1284 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1285 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1286 "Unexpected reduction kind");
1287 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1288 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1289
1290 return B.CreateFAddReduce(Start, Src);
1291}
1292
1295 Value *Src, Value *Start) {
1296 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1297 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1298 "Unexpected reduction kind");
1299 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1300 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1301
1302 Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
1303 auto *SrcTy = cast<VectorType>(Src->getType());
1304 Value *Ops[] = {Start, Src};
1305 return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);
1306}
1307
1309 bool IncludeWrapFlags) {
1310 auto *VecOp = dyn_cast<Instruction>(I);
1311 if (!VecOp)
1312 return;
1313 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1314 : dyn_cast<Instruction>(OpValue);
1315 if (!Intersection)
1316 return;
1317 const unsigned Opcode = Intersection->getOpcode();
1318 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1319 for (auto *V : VL) {
1320 auto *Instr = dyn_cast<Instruction>(V);
1321 if (!Instr)
1322 continue;
1323 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1324 VecOp->andIRFlags(V);
1325 }
1326}
1327
1328bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1329 ScalarEvolution &SE) {
1330 const SCEV *Zero = SE.getZero(S->getType());
1331 return SE.isAvailableAtLoopEntry(S, L) &&
1332 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1333}
1334
1336 ScalarEvolution &SE) {
1337 const SCEV *Zero = SE.getZero(S->getType());
1338 return SE.isAvailableAtLoopEntry(S, L) &&
1339 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1340}
1341
1342bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1343 ScalarEvolution &SE) {
1344 const SCEV *Zero = SE.getZero(S->getType());
1345 return SE.isAvailableAtLoopEntry(S, L) &&
1346 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1347}
1348
1350 ScalarEvolution &SE) {
1351 const SCEV *Zero = SE.getZero(S->getType());
1352 return SE.isAvailableAtLoopEntry(S, L) &&
1353 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1354}
1355
1357 bool Signed) {
1358 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1361 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1362 return SE.isAvailableAtLoopEntry(S, L) &&
1363 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1364 SE.getConstant(Min));
1365}
1366
1368 bool Signed) {
1369 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1372 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1373 return SE.isAvailableAtLoopEntry(S, L) &&
1374 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1375 SE.getConstant(Max));
1376}
1377
1378//===----------------------------------------------------------------------===//
1379// rewriteLoopExitValues - Optimize IV users outside the loop.
1380// As a side effect, reduces the amount of IV processing within the loop.
1381//===----------------------------------------------------------------------===//
1382
1383static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1386 Visited.insert(I);
1387 WorkList.push_back(I);
1388 while (!WorkList.empty()) {
1389 const Instruction *Curr = WorkList.pop_back_val();
1390 // This use is outside the loop, nothing to do.
1391 if (!L->contains(Curr))
1392 continue;
1393 // Do we assume it is a "hard" use which will not be eliminated easily?
1394 if (Curr->mayHaveSideEffects())
1395 return true;
1396 // Otherwise, add all its users to worklist.
1397 for (const auto *U : Curr->users()) {
1398 auto *UI = cast<Instruction>(U);
1399 if (Visited.insert(UI).second)
1400 WorkList.push_back(UI);
1401 }
1402 }
1403 return false;
1404}
1405
1406// Collect information about PHI nodes which can be transformed in
1407// rewriteLoopExitValues.
1409 PHINode *PN; // For which PHI node is this replacement?
1410 unsigned Ith; // For which incoming value?
1411 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1412 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1413 bool HighCost; // Is this expansion a high-cost?
1414
1415 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1416 bool H)
1417 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1418 HighCost(H) {}
1419};
1420
1421// Check whether it is possible to delete the loop after rewriting exit
1422// value. If it is possible, ignore ReplaceExitValue and do rewriting
1423// aggressively.
1424static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1425 BasicBlock *Preheader = L->getLoopPreheader();
1426 // If there is no preheader, the loop will not be deleted.
1427 if (!Preheader)
1428 return false;
1429
1430 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1431 // We obviate multiple ExitingBlocks case for simplicity.
1432 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1433 // after exit value rewriting, we can enhance the logic here.
1434 SmallVector<BasicBlock *, 4> ExitingBlocks;
1435 L->getExitingBlocks(ExitingBlocks);
1437 L->getUniqueExitBlocks(ExitBlocks);
1438 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1439 return false;
1440
1441 BasicBlock *ExitBlock = ExitBlocks[0];
1442 BasicBlock::iterator BI = ExitBlock->begin();
1443 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1444 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1445
1446 // If the Incoming value of P is found in RewritePhiSet, we know it
1447 // could be rewritten to use a loop invariant value in transformation
1448 // phase later. Skip it in the loop invariant check below.
1449 bool found = false;
1450 for (const RewritePhi &Phi : RewritePhiSet) {
1451 unsigned i = Phi.Ith;
1452 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1453 found = true;
1454 break;
1455 }
1456 }
1457
1458 Instruction *I;
1459 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1460 if (!L->hasLoopInvariantOperands(I))
1461 return false;
1462
1463 ++BI;
1464 }
1465
1466 for (auto *BB : L->blocks())
1467 if (llvm::any_of(*BB, [](Instruction &I) {
1468 return I.mayHaveSideEffects();
1469 }))
1470 return false;
1471
1472 return true;
1473}
1474
1475/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1476/// and returns true if this Phi is an induction phi in the loop. When
1477/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1478static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1480 if (!Phi)
1481 return false;
1482 if (!L->getLoopPreheader())
1483 return false;
1484 if (Phi->getParent() != L->getHeader())
1485 return false;
1486 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1487}
1488
1490 ScalarEvolution *SE,
1491 const TargetTransformInfo *TTI,
1492 SCEVExpander &Rewriter, DominatorTree *DT,
1495 // Check a pre-condition.
1496 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1497 "Indvars did not preserve LCSSA!");
1498
1499 SmallVector<BasicBlock*, 8> ExitBlocks;
1500 L->getUniqueExitBlocks(ExitBlocks);
1501
1502 SmallVector<RewritePhi, 8> RewritePhiSet;
1503 // Find all values that are computed inside the loop, but used outside of it.
1504 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1505 // the exit blocks of the loop to find them.
1506 for (BasicBlock *ExitBB : ExitBlocks) {
1507 // If there are no PHI nodes in this exit block, then no values defined
1508 // inside the loop are used on this path, skip it.
1509 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1510 if (!PN) continue;
1511
1512 unsigned NumPreds = PN->getNumIncomingValues();
1513
1514 // Iterate over all of the PHI nodes.
1515 BasicBlock::iterator BBI = ExitBB->begin();
1516 while ((PN = dyn_cast<PHINode>(BBI++))) {
1517 if (PN->use_empty())
1518 continue; // dead use, don't replace it
1519
1520 if (!SE->isSCEVable(PN->getType()))
1521 continue;
1522
1523 // Iterate over all of the values in all the PHI nodes.
1524 for (unsigned i = 0; i != NumPreds; ++i) {
1525 // If the value being merged in is not integer or is not defined
1526 // in the loop, skip it.
1527 Value *InVal = PN->getIncomingValue(i);
1528 if (!isa<Instruction>(InVal))
1529 continue;
1530
1531 // If this pred is for a subloop, not L itself, skip it.
1532 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1533 continue; // The Block is in a subloop, skip it.
1534
1535 // Check that InVal is defined in the loop.
1536 Instruction *Inst = cast<Instruction>(InVal);
1537 if (!L->contains(Inst))
1538 continue;
1539
1540 // Find exit values which are induction variables in the loop, and are
1541 // unused in the loop, with the only use being the exit block PhiNode,
1542 // and the induction variable update binary operator.
1543 // The exit value can be replaced with the final value when it is cheap
1544 // to do so.
1547 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1548 if (IndPhi) {
1549 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1550 continue;
1551 // This is an induction PHI. Check that the only users are PHI
1552 // nodes, and induction variable update binary operators.
1553 if (llvm::any_of(Inst->users(), [&](User *U) {
1554 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1555 return true;
1556 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1557 if (B && B != ID.getInductionBinOp())
1558 return true;
1559 return false;
1560 }))
1561 continue;
1562 } else {
1563 // If it is not an induction phi, it must be an induction update
1564 // binary operator with an induction phi user.
1565 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1566 if (!B)
1567 continue;
1568 if (llvm::any_of(Inst->users(), [&](User *U) {
1569 PHINode *Phi = dyn_cast<PHINode>(U);
1570 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1571 return true;
1572 return false;
1573 }))
1574 continue;
1575 if (B != ID.getInductionBinOp())
1576 continue;
1577 }
1578 }
1579
1580 // Okay, this instruction has a user outside of the current loop
1581 // and varies predictably *inside* the loop. Evaluate the value it
1582 // contains when the loop exits, if possible. We prefer to start with
1583 // expressions which are true for all exits (so as to maximize
1584 // expression reuse by the SCEVExpander), but resort to per-exit
1585 // evaluation if that fails.
1586 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1587 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1588 !SE->isLoopInvariant(ExitValue, L) ||
1589 !Rewriter.isSafeToExpand(ExitValue)) {
1590 // TODO: This should probably be sunk into SCEV in some way; maybe a
1591 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1592 // most SCEV expressions and other recurrence types (e.g. shift
1593 // recurrences). Is there existing code we can reuse?
1594 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1595 if (isa<SCEVCouldNotCompute>(ExitCount))
1596 continue;
1597 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1598 if (AddRec->getLoop() == L)
1599 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1600 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1601 !SE->isLoopInvariant(ExitValue, L) ||
1602 !Rewriter.isSafeToExpand(ExitValue))
1603 continue;
1604 }
1605
1606 // Computing the value outside of the loop brings no benefit if it is
1607 // definitely used inside the loop in a way which can not be optimized
1608 // away. Avoid doing so unless we know we have a value which computes
1609 // the ExitValue already. TODO: This should be merged into SCEV
1610 // expander to leverage its knowledge of existing expressions.
1611 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1612 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1613 continue;
1614
1615 // Check if expansions of this SCEV would count as being high cost.
1616 bool HighCost = Rewriter.isHighCostExpansion(
1617 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1618
1619 // Note that we must not perform expansions until after
1620 // we query *all* the costs, because if we perform temporary expansion
1621 // inbetween, one that we might not intend to keep, said expansion
1622 // *may* affect cost calculation of the next SCEV's we'll query,
1623 // and next SCEV may errneously get smaller cost.
1624
1625 // Collect all the candidate PHINodes to be rewritten.
1626 Instruction *InsertPt =
1627 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1628 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1629 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1630 }
1631 }
1632 }
1633
1634 // TODO: evaluate whether it is beneficial to change how we calculate
1635 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1636 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1637 // potentially giving cost bonus to those other SCEV's?
1638
1639 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1640 int NumReplaced = 0;
1641
1642 // Transformation.
1643 for (const RewritePhi &Phi : RewritePhiSet) {
1644 PHINode *PN = Phi.PN;
1645
1646 // Only do the rewrite when the ExitValue can be expanded cheaply.
1647 // If LoopCanBeDel is true, rewrite exit value aggressively.
1650 !LoopCanBeDel && Phi.HighCost)
1651 continue;
1652
1653 Value *ExitVal = Rewriter.expandCodeFor(
1654 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1655
1656 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1657 << '\n'
1658 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1659
1660#ifndef NDEBUG
1661 // If we reuse an instruction from a loop which is neither L nor one of
1662 // its containing loops, we end up breaking LCSSA form for this loop by
1663 // creating a new use of its instruction.
1664 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1665 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1666 if (EVL != L)
1667 assert(EVL->contains(L) && "LCSSA breach detected!");
1668#endif
1669
1670 NumReplaced++;
1671 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1672 PN->setIncomingValue(Phi.Ith, ExitVal);
1673 // It's necessary to tell ScalarEvolution about this explicitly so that
1674 // it can walk the def-use list and forget all SCEVs, as it may not be
1675 // watching the PHI itself. Once the new exit value is in place, there
1676 // may not be a def-use connection between the loop and every instruction
1677 // which got a SCEVAddRecExpr for that loop.
1678 SE->forgetValue(PN);
1679
1680 // If this instruction is dead now, delete it. Don't do it now to avoid
1681 // invalidating iterators.
1682 if (isInstructionTriviallyDead(Inst, TLI))
1683 DeadInsts.push_back(Inst);
1684
1685 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1686 if (PN->getNumIncomingValues() == 1 &&
1687 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1688 PN->replaceAllUsesWith(ExitVal);
1689 PN->eraseFromParent();
1690 }
1691 }
1692
1693 // The insertion point instruction may have been deleted; clear it out
1694 // so that the rewriter doesn't trip over it later.
1695 Rewriter.clearInsertPoint();
1696 return NumReplaced;
1697}
1698
1699/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1700/// \p OrigLoop.
1701void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1702 Loop *RemainderLoop, uint64_t UF) {
1703 assert(UF > 0 && "Zero unrolled factor is not supported");
1704 assert(UnrolledLoop != RemainderLoop &&
1705 "Unrolled and Remainder loops are expected to distinct");
1706
1707 // Get number of iterations in the original scalar loop.
1708 unsigned OrigLoopInvocationWeight = 0;
1709 std::optional<unsigned> OrigAverageTripCount =
1710 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1711 if (!OrigAverageTripCount)
1712 return;
1713
1714 // Calculate number of iterations in unrolled loop.
1715 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1716 // Calculate number of iterations for remainder loop.
1717 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1718
1719 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1720 OrigLoopInvocationWeight);
1721 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1722 OrigLoopInvocationWeight);
1723}
1724
1725/// Utility that implements appending of loops onto a worklist.
1726/// Loops are added in preorder (analogous for reverse postorder for trees),
1727/// and the worklist is processed LIFO.
1728template <typename RangeT>
1730 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1731 // We use an internal worklist to build up the preorder traversal without
1732 // recursion.
1733 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1734
1735 // We walk the initial sequence of loops in reverse because we generally want
1736 // to visit defs before uses and the worklist is LIFO.
1737 for (Loop *RootL : Loops) {
1738 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1739 assert(PreOrderWorklist.empty() &&
1740 "Must start with an empty preorder walk worklist.");
1741 PreOrderWorklist.push_back(RootL);
1742 do {
1743 Loop *L = PreOrderWorklist.pop_back_val();
1744 PreOrderWorklist.append(L->begin(), L->end());
1745 PreOrderLoops.push_back(L);
1746 } while (!PreOrderWorklist.empty());
1747
1748 Worklist.insert(std::move(PreOrderLoops));
1749 PreOrderLoops.clear();
1750 }
1751}
1752
1753template <typename RangeT>
1757}
1758
1759template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1761
1762template void
1763llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1765
1768 appendReversedLoopsToWorklist(LI, Worklist);
1769}
1770
1772 LoopInfo *LI, LPPassManager *LPM) {
1773 Loop &New = *LI->AllocateLoop();
1774 if (PL)
1775 PL->addChildLoop(&New);
1776 else
1777 LI->addTopLevelLoop(&New);
1778
1779 if (LPM)
1780 LPM->addLoop(New);
1781
1782 // Add all of the blocks in L to the new loop.
1783 for (BasicBlock *BB : L->blocks())
1784 if (LI->getLoopFor(BB) == L)
1785 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1786
1787 // Add all of the subloops to the new loop.
1788 for (Loop *I : *L)
1789 cloneLoop(I, &New, VM, LI, LPM);
1790
1791 return &New;
1792}
1793
1794/// IR Values for the lower and upper bounds of a pointer evolution. We
1795/// need to use value-handles because SCEV expansion can invalidate previously
1796/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1797/// a previous one.
1802};
1803
1804/// Expand code for the lower and upper bound of the pointer group \p CG
1805/// in \p TheLoop. \return the values for the bounds.
1807 Loop *TheLoop, Instruction *Loc,
1808 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1809 LLVMContext &Ctx = Loc->getContext();
1810 Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
1811
1812 Value *Start = nullptr, *End = nullptr;
1813 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1814 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1815
1816 // If the Low and High values are themselves loop-variant, then we may want
1817 // to expand the range to include those covered by the outer loop as well.
1818 // There is a trade-off here with the advantage being that creating checks
1819 // using the expanded range permits the runtime memory checks to be hoisted
1820 // out of the outer loop. This reduces the cost of entering the inner loop,
1821 // which can be significant for low trip counts. The disadvantage is that
1822 // there is a chance we may now never enter the vectorized inner loop,
1823 // whereas using a restricted range check could have allowed us to enter at
1824 // least once. This is why the behaviour is not currently the default and is
1825 // controlled by the parameter 'HoistRuntimeChecks'.
1826 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1827 isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
1828 auto *HighAR = cast<SCEVAddRecExpr>(High);
1829 auto *LowAR = cast<SCEVAddRecExpr>(Low);
1830 const Loop *OuterLoop = TheLoop->getParentLoop();
1831 ScalarEvolution &SE = *Exp.getSE();
1832 const SCEV *Recur = LowAR->getStepRecurrence(SE);
1833 if (Recur == HighAR->getStepRecurrence(SE) &&
1834 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1835 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1836 const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch);
1837 if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1838 OuterExitCount->getType()->isIntegerTy()) {
1839 const SCEV *NewHigh =
1840 cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE);
1841 if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1842 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1843 "outer loop in order to permit hoisting\n");
1844 High = NewHigh;
1845 Low = cast<SCEVAddRecExpr>(Low)->getStart();
1846 // If there is a possibility that the stride is negative then we have
1847 // to generate extra checks to ensure the stride is positive.
1848 if (!SE.isKnownNonNegative(
1849 SE.applyLoopGuards(Recur, HighAR->getLoop()))) {
1850 Stride = Recur;
1851 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1852 "positive: "
1853 << *Stride << '\n');
1854 }
1855 }
1856 }
1857 }
1858 }
1859
1860 Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
1861 End = Exp.expandCodeFor(High, PtrArithTy, Loc);
1862 if (CG->NeedsFreeze) {
1863 IRBuilder<> Builder(Loc);
1864 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1865 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1866 }
1867 Value *StrideVal =
1868 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
1869 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1870 return {Start, End, StrideVal};
1871}
1872
1873/// Turns a collection of checks into a collection of expanded upper and
1874/// lower bounds for both pointers in the check.
1877 Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
1879
1880 // Here we're relying on the SCEV Expander's cache to only emit code for the
1881 // same bounds once.
1882 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1883 [&](const RuntimePointerCheck &Check) {
1884 PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
1886 Second = expandBounds(Check.second, L, Loc, Exp,
1888 return std::make_pair(First, Second);
1889 });
1890
1891 return ChecksWithBounds;
1892}
1893
1895 Instruction *Loc, Loop *TheLoop,
1896 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1897 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1898 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1899 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1900 auto ExpandedChecks =
1901 expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
1902
1903 LLVMContext &Ctx = Loc->getContext();
1904 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
1905 ChkBuilder.SetInsertPoint(Loc);
1906 // Our instructions might fold to a constant.
1907 Value *MemoryRuntimeCheck = nullptr;
1908
1909 for (const auto &[A, B] : ExpandedChecks) {
1910 // Check if two pointers (A and B) conflict where conflict is computed as:
1911 // start(A) <= end(B) && start(B) <= end(A)
1912
1913 assert((A.Start->getType()->getPointerAddressSpace() ==
1914 B.End->getType()->getPointerAddressSpace()) &&
1915 (B.Start->getType()->getPointerAddressSpace() ==
1916 A.End->getType()->getPointerAddressSpace()) &&
1917 "Trying to bounds check pointers with different address spaces");
1918
1919 // [A|B].Start points to the first accessed byte under base [A|B].
1920 // [A|B].End points to the last accessed byte, plus one.
1921 // There is no conflict when the intervals are disjoint:
1922 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1923 //
1924 // bound0 = (B.Start < A.End)
1925 // bound1 = (A.Start < B.End)
1926 // IsConflict = bound0 & bound1
1927 Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
1928 Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
1929 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1930 if (A.StrideToCheck) {
1931 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1932 A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
1933 "stride.check");
1934 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1935 }
1936 if (B.StrideToCheck) {
1937 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
1938 B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
1939 "stride.check");
1940 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
1941 }
1942 if (MemoryRuntimeCheck) {
1943 IsConflict =
1944 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1945 }
1946 MemoryRuntimeCheck = IsConflict;
1947 }
1948
1949 return MemoryRuntimeCheck;
1950}
1951
1953 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1954 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1955
1956 LLVMContext &Ctx = Loc->getContext();
1957 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
1958 ChkBuilder.SetInsertPoint(Loc);
1959 // Our instructions might fold to a constant.
1960 Value *MemoryRuntimeCheck = nullptr;
1961
1962 auto &SE = *Expander.getSE();
1963 // Map to keep track of created compares, The key is the pair of operands for
1964 // the compare, to allow detecting and re-using redundant compares.
1966 for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
1967 Type *Ty = SinkStart->getType();
1968 // Compute VF * IC * AccessSize.
1969 auto *VFTimesUFTimesSize =
1970 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1971 ConstantInt::get(Ty, IC * AccessSize));
1972 Value *Diff =
1973 Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
1974
1975 // Check if the same compare has already been created earlier. In that case,
1976 // there is no need to check it again.
1977 Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
1978 if (IsConflict)
1979 continue;
1980
1981 IsConflict =
1982 ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1983 SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
1984 if (NeedsFreeze)
1985 IsConflict =
1986 ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
1987 if (MemoryRuntimeCheck) {
1988 IsConflict =
1989 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1990 }
1991 MemoryRuntimeCheck = IsConflict;
1992 }
1993
1994 return MemoryRuntimeCheck;
1995}
1996
1997std::optional<IVConditionInfo>
1999 const MemorySSA &MSSA, AAResults &AA) {
2000 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
2001 if (!TI || !TI->isConditional())
2002 return {};
2003
2004 auto *CondI = dyn_cast<Instruction>(TI->getCondition());
2005 // The case with the condition outside the loop should already be handled
2006 // earlier.
2007 // Allow CmpInst and TruncInsts as they may be users of load instructions
2008 // and have potential for partial unswitching
2009 if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))
2010 return {};
2011
2012 SmallVector<Instruction *> InstToDuplicate;
2013 InstToDuplicate.push_back(CondI);
2014
2015 SmallVector<Value *, 4> WorkList;
2016 WorkList.append(CondI->op_begin(), CondI->op_end());
2017
2018 SmallVector<MemoryAccess *, 4> AccessesToCheck;
2019 SmallVector<MemoryLocation, 4> AccessedLocs;
2020 while (!WorkList.empty()) {
2021 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
2022 if (!I || !L.contains(I))
2023 continue;
2024
2025 // TODO: support additional instructions.
2026 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
2027 return {};
2028
2029 // Do not duplicate volatile and atomic loads.
2030 if (auto *LI = dyn_cast<LoadInst>(I))
2031 if (LI->isVolatile() || LI->isAtomic())
2032 return {};
2033
2034 InstToDuplicate.push_back(I);
2035 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
2036 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
2037 // Queue the defining access to check for alias checks.
2038 AccessesToCheck.push_back(MemUse->getDefiningAccess());
2039 AccessedLocs.push_back(MemoryLocation::get(I));
2040 } else {
2041 // MemoryDefs may clobber the location or may be atomic memory
2042 // operations. Bail out.
2043 return {};
2044 }
2045 }
2046 WorkList.append(I->op_begin(), I->op_end());
2047 }
2048
2049 if (InstToDuplicate.empty())
2050 return {};
2051
2052 SmallVector<BasicBlock *, 4> ExitingBlocks;
2053 L.getExitingBlocks(ExitingBlocks);
2054 auto HasNoClobbersOnPath =
2055 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2056 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
2057 SmallVector<MemoryAccess *, 4> AccessesToCheck)
2058 -> std::optional<IVConditionInfo> {
2060 // First, collect all blocks in the loop that are on a patch from Succ
2061 // to the header.
2063 WorkList.push_back(Succ);
2064 WorkList.push_back(Header);
2066 Seen.insert(Header);
2067 Info.PathIsNoop &=
2068 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2069
2070 while (!WorkList.empty()) {
2071 BasicBlock *Current = WorkList.pop_back_val();
2072 if (!L.contains(Current))
2073 continue;
2074 const auto &SeenIns = Seen.insert(Current);
2075 if (!SeenIns.second)
2076 continue;
2077
2078 Info.PathIsNoop &= all_of(
2079 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2080 WorkList.append(succ_begin(Current), succ_end(Current));
2081 }
2082
2083 // Require at least 2 blocks on a path through the loop. This skips
2084 // paths that directly exit the loop.
2085 if (Seen.size() < 2)
2086 return {};
2087
2088 // Next, check if there are any MemoryDefs that are on the path through
2089 // the loop (in the Seen set) and they may-alias any of the locations in
2090 // AccessedLocs. If that is the case, they may modify the condition and
2091 // partial unswitching is not possible.
2092 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
2093 while (!AccessesToCheck.empty()) {
2094 MemoryAccess *Current = AccessesToCheck.pop_back_val();
2095 auto SeenI = SeenAccesses.insert(Current);
2096 if (!SeenI.second || !Seen.contains(Current->getBlock()))
2097 continue;
2098
2099 // Bail out if exceeded the threshold.
2100 if (SeenAccesses.size() >= MSSAThreshold)
2101 return {};
2102
2103 // MemoryUse are read-only accesses.
2104 if (isa<MemoryUse>(Current))
2105 continue;
2106
2107 // For a MemoryDef, check if is aliases any of the location feeding
2108 // the original condition.
2109 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
2110 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
2111 return isModSet(
2112 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
2113 }))
2114 return {};
2115 }
2116
2117 for (Use &U : Current->uses())
2118 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
2119 }
2120
2121 // We could also allow loops with known trip counts without mustprogress,
2122 // but ScalarEvolution may not be available.
2123 Info.PathIsNoop &= isMustProgress(&L);
2124
2125 // If the path is considered a no-op so far, check if it reaches a
2126 // single exit block without any phis. This ensures no values from the
2127 // loop are used outside of the loop.
2128 if (Info.PathIsNoop) {
2129 for (auto *Exiting : ExitingBlocks) {
2130 if (!Seen.contains(Exiting))
2131 continue;
2132 for (auto *Succ : successors(Exiting)) {
2133 if (L.contains(Succ))
2134 continue;
2135
2136 Info.PathIsNoop &= Succ->phis().empty() &&
2137 (!Info.ExitForPath || Info.ExitForPath == Succ);
2138 if (!Info.PathIsNoop)
2139 break;
2140 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
2141 "cannot have multiple exit blocks");
2142 Info.ExitForPath = Succ;
2143 }
2144 }
2145 }
2146 if (!Info.ExitForPath)
2147 Info.PathIsNoop = false;
2148
2149 Info.InstToDuplicate = InstToDuplicate;
2150 return Info;
2151 };
2152
2153 // If we branch to the same successor, partial unswitching will not be
2154 // beneficial.
2155 if (TI->getSuccessor(0) == TI->getSuccessor(1))
2156 return {};
2157
2158 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2159 AccessesToCheck)) {
2160 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2161 return Info;
2162 }
2163 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2164 AccessesToCheck)) {
2165 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2166 return Info;
2167 }
2168
2169 return {};
2170}
@ Poison
amdgpu AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:87
std::string Name
bool End
Definition: ELF_riscv.cpp:480
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:822
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1383
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:55
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1806
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1424
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:54
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:203
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:804
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1478
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
uint64_t High
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:270
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:307
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
This class represents an Operation in the Expression.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
CallInst * CreateMulReduce(Value *Src)
Create a vector int mul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:438
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:418
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1280
CallInst * CreateAndReduce(Value *Src)
Create a vector int AND reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:442
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:434
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2555
CallInst * CreateXorReduce(Value *Src)
Create a vector int XOR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:450
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:446
CallInst * CreateFPMinReduce(Value *Src)
Create a vector float min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:470
CallInst * CreateFPMaximumReduce(Value *Src)
Create a vector float maximum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:474
CallInst * CreateFPMaxReduce(Value *Src)
Create a vector float max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:466
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2386
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1137
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:454
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:468
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1683
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1131
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2293
CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:460
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:426
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1378
CallInst * CreateFPMinimumReduce(Value *Src)
Create a vector float minimum reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:478
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void addLoop(Loop &L)
Definition: LoopPass.cpp:77
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:887
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
LLVMContext & getContext() const
Definition: Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
Tuple of metadata.
Definition: Metadata.h:1472
BasicBlock * getBlock() const
Definition: MemorySSA.h:161
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:697
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:715
Root of the metadata hierarchy.
Definition: Metadata.h:62
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:70
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:95
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:441
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:230
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Value * createSimpleTargetReduction(Intrinsic::ID RdxID, Type *ValTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Emit a VP reduction intrinsic call for recurrence kind.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:250
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1894
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:849
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:988
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1097
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
Definition: LoopUtils.cpp:1349
Value * createSimpleTargetReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:1210
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1079
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1729
auto successors(const MachineBasicBlock *BB)
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:189
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:465
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:959
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
char & LCSSAID
Definition: LCSSA.cpp:542
char & LoopSimplifyID
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1075
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:214
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1367
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:467
TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:391
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK)
Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence kind.
Definition: LoopUtils.cpp:921
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1150
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:483
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1281
cl::opt< unsigned > SCEVCheapExpansionBudget
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1117
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:427
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:724
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1308
bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
Definition: LoopUtils.cpp:1342
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2837
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1101
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1053
TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:437
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:280
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:300
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:283
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:867
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1701
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
DWARFExpression::Operation Op
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1754
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1328
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:899
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:469
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1489
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:149
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:1952
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1034
ReplaceExitVal
Definition: LoopUtils.h:468
@ UnusedIndVarInLoop
Definition: LoopUtils.h:472
@ OnlyCheapRepl
Definition: LoopUtils.h:470
@ AlwaysRepl
Definition: LoopUtils.h:473
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1998
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1356
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1335
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:1092
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1029
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1265
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1771
Value * createAnyOfTargetReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a target reduction of the given vector Src for a reduction of the kind RecurKind::IAnyOf or Re...
Definition: LoopUtils.cpp:1175
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1798
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1799
TrackingVH< Value > End
Definition: LoopUtils.cpp:1800
Value * StrideToCheck
Definition: LoopUtils.cpp:1801
unsigned Ith
Definition: LoopUtils.cpp:1410
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1415
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1411
PHINode * PN
Definition: LoopUtils.cpp:1409
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1412
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:546
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.