LLVM 22.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
45#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51using namespace llvm::PatternMatch;
52
53#define DEBUG_TYPE "loop-utils"
54
55static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
56static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
57
59 MemorySSAUpdater *MSSAU,
60 bool PreserveLCSSA) {
61 bool Changed = false;
62
63 // We re-use a vector for the in-loop predecesosrs.
64 SmallVector<BasicBlock *, 4> InLoopPredecessors;
65
66 auto RewriteExit = [&](BasicBlock *BB) {
67 assert(InLoopPredecessors.empty() &&
68 "Must start with an empty predecessors list!");
69 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
70
71 // See if there are any non-loop predecessors of this exit block and
72 // keep track of the in-loop predecessors.
73 bool IsDedicatedExit = true;
74 for (auto *PredBB : predecessors(BB))
75 if (L->contains(PredBB)) {
76 if (isa<IndirectBrInst>(PredBB->getTerminator()))
77 // We cannot rewrite exiting edges from an indirectbr.
78 return false;
79
80 InLoopPredecessors.push_back(PredBB);
81 } else {
82 IsDedicatedExit = false;
83 }
84
85 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
86
87 // Nothing to do if this is already a dedicated exit.
88 if (IsDedicatedExit)
89 return false;
90
91 auto *NewExitBB = SplitBlockPredecessors(
92 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
93
94 if (!NewExitBB)
96 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
97 << *L << "\n");
98 else
99 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
100 << NewExitBB->getName() << "\n");
101 return true;
102 };
103
104 // Walk the exit blocks directly rather than building up a data structure for
105 // them, but only visit each one once.
107 for (auto *BB : L->blocks())
108 for (auto *SuccBB : successors(BB)) {
109 // We're looking for exit blocks so skip in-loop successors.
110 if (L->contains(SuccBB))
111 continue;
112
113 // Visit each exit block exactly once.
114 if (!Visited.insert(SuccBB).second)
115 continue;
116
117 Changed |= RewriteExit(SuccBB);
118 }
119
120 return Changed;
121}
122
123/// Returns the instructions that use values defined in the loop.
126
127 for (auto *Block : L->getBlocks())
128 // FIXME: I believe that this could use copy_if if the Inst reference could
129 // be adapted into a pointer.
130 for (auto &Inst : *Block) {
131 auto Users = Inst.users();
132 if (any_of(Users, [&](User *U) {
133 auto *Use = cast<Instruction>(U);
134 return !L->contains(Use->getParent());
135 }))
136 UsedOutside.push_back(&Inst);
137 }
138
139 return UsedOutside;
140}
141
143 // By definition, all loop passes need the LoopInfo analysis and the
144 // Dominator tree it depends on. Because they all participate in the loop
145 // pass manager, they must also preserve these.
150
151 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
152 // here because users shouldn't directly get them from this header.
153 extern char &LoopSimplifyID;
154 extern char &LCSSAID;
159 // This is used in the LPPassManager to perform LCSSA verification on passes
160 // which preserve lcssa form
163
164 // Loop passes are designed to run inside of a loop pass manager which means
165 // that any function analyses they require must be required by the first loop
166 // pass in the manager (so that it is computed before the loop pass manager
167 // runs) and preserved by all loop pasess in the manager. To make this
168 // reasonably robust, the set needed for most loop passes is maintained here.
169 // If your loop pass requires an analysis not listed here, you will need to
170 // carefully audit the loop pass manager nesting structure that results.
178 // FIXME: When all loop passes preserve MemorySSA, it can be required and
179 // preserved here instead of the individual handling in each pass.
180}
181
182/// Manually defined generic "LoopPass" dependency initialization. This is used
183/// to initialize the exact set of passes from above in \c
184/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
185/// with:
186///
187/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
188///
189/// As-if "LoopPass" were a pass.
202
203/// Create MDNode for input string.
204static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
205 LLVMContext &Context = TheLoop->getHeader()->getContext();
206 Metadata *MDs[] = {
207 MDString::get(Context, Name),
208 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
209 return MDNode::get(Context, MDs);
210}
211
212/// Set input string into loop metadata by keeping other values intact.
213/// If the string is already in loop metadata update value if it is
214/// different.
215void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
216 unsigned V) {
218 // If the loop already has metadata, retain it.
219 MDNode *LoopID = TheLoop->getLoopID();
220 if (LoopID) {
221 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
222 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
223 // If it is of form key = value, try to parse it.
224 if (Node->getNumOperands() == 2) {
225 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
226 if (S && S->getString() == StringMD) {
227 ConstantInt *IntMD =
229 if (IntMD && IntMD->getSExtValue() == V)
230 // It is already in place. Do nothing.
231 return;
232 // We need to update the value, so just skip it here and it will
233 // be added after copying other existed nodes.
234 continue;
235 }
236 }
237 MDs.push_back(Node);
238 }
239 }
240 // Add new metadata.
241 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
242 // Replace current metadata node with new one.
243 LLVMContext &Context = TheLoop->getHeader()->getContext();
244 MDNode *NewLoopID = MDNode::get(Context, MDs);
245 // Set operand 0 to refer to the loop id itself.
246 NewLoopID->replaceOperandWith(0, NewLoopID);
247 TheLoop->setLoopID(NewLoopID);
248}
249
250std::optional<ElementCount>
252 std::optional<int> Width =
253 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
254
255 if (Width) {
256 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
257 TheLoop, "llvm.loop.vectorize.scalable.enable");
258 return ElementCount::get(*Width, IsScalable.value_or(false));
259 }
260
261 return std::nullopt;
262}
263
264std::optional<MDNode *> llvm::makeFollowupLoopID(
265 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
266 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
267 if (!OrigLoopID) {
268 if (AlwaysNew)
269 return nullptr;
270 return std::nullopt;
271 }
272
273 assert(OrigLoopID->getOperand(0) == OrigLoopID);
274
275 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
276 bool InheritSomeAttrs =
277 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
279 MDs.push_back(nullptr);
280
281 bool Changed = false;
282 if (InheritAllAttrs || InheritSomeAttrs) {
283 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
284 MDNode *Op = cast<MDNode>(Existing.get());
285
286 auto InheritThisAttribute = [InheritSomeAttrs,
287 InheritOptionsExceptPrefix](MDNode *Op) {
288 if (!InheritSomeAttrs)
289 return false;
290
291 // Skip malformatted attribute metadata nodes.
292 if (Op->getNumOperands() == 0)
293 return true;
294 Metadata *NameMD = Op->getOperand(0).get();
295 if (!isa<MDString>(NameMD))
296 return true;
297 StringRef AttrName = cast<MDString>(NameMD)->getString();
298
299 // Do not inherit excluded attributes.
300 return !AttrName.starts_with(InheritOptionsExceptPrefix);
301 };
302
303 if (InheritThisAttribute(Op))
304 MDs.push_back(Op);
305 else
306 Changed = true;
307 }
308 } else {
309 // Modified if we dropped at least one attribute.
310 Changed = OrigLoopID->getNumOperands() > 1;
311 }
312
313 bool HasAnyFollowup = false;
314 for (StringRef OptionName : FollowupOptions) {
315 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
316 if (!FollowupNode)
317 continue;
318
319 HasAnyFollowup = true;
320 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
321 MDs.push_back(Option.get());
322 Changed = true;
323 }
324 }
325
326 // Attributes of the followup loop not specified explicity, so signal to the
327 // transformation pass to add suitable attributes.
328 if (!AlwaysNew && !HasAnyFollowup)
329 return std::nullopt;
330
331 // If no attributes were added or remove, the previous loop Id can be reused.
332 if (!AlwaysNew && !Changed)
333 return OrigLoopID;
334
335 // No attributes is equivalent to having no !llvm.loop metadata at all.
336 if (MDs.size() == 1)
337 return nullptr;
338
339 // Build the new loop ID.
340 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
341 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
342 return FollowupLoopID;
343}
344
348
352
354 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
355 return TM_SuppressedByUser;
356
357 std::optional<int> Count =
358 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
359 if (Count)
361
362 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
363 return TM_ForcedByUser;
364
365 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
366 return TM_ForcedByUser;
367
369 return TM_Disable;
370
371 return TM_Unspecified;
372}
373
375 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
376 return TM_SuppressedByUser;
377
378 std::optional<int> Count =
379 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
380 if (Count)
382
383 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
384 return TM_ForcedByUser;
385
387 return TM_Disable;
388
389 return TM_Unspecified;
390}
391
393 std::optional<bool> Enable =
394 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
395
396 if (Enable == false)
397 return TM_SuppressedByUser;
398
399 std::optional<ElementCount> VectorizeWidth =
401 std::optional<int> InterleaveCount =
402 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
403
404 // 'Forcing' vector width and interleave count to one effectively disables
405 // this tranformation.
406 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
407 InterleaveCount == 1)
408 return TM_SuppressedByUser;
409
410 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
411 return TM_Disable;
412
413 if (Enable == true)
414 return TM_ForcedByUser;
415
416 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
417 return TM_Disable;
418
419 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
420 return TM_Enable;
421
423 return TM_Disable;
424
425 return TM_Unspecified;
426}
427
429 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
430 return TM_ForcedByUser;
431
433 return TM_Disable;
434
435 return TM_Unspecified;
436}
437
439 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
440 return TM_SuppressedByUser;
441
443 return TM_Disable;
444
445 return TM_Unspecified;
446}
447
448/// Does a BFS from a given node to all of its children inside a given loop.
449/// The returned vector of basic blocks includes the starting point.
451 DomTreeNode *N,
452 const Loop *CurLoop) {
454 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
455 // Only include subregions in the top level loop.
456 BasicBlock *BB = DTN->getBlock();
457 if (CurLoop->contains(BB))
458 Worklist.push_back(DTN->getBlock());
459 };
460
461 AddRegionToWorklist(N);
462
463 for (size_t I = 0; I < Worklist.size(); I++) {
464 for (DomTreeNode *Child : DT->getNode(Worklist[I])->children())
465 AddRegionToWorklist(Child);
466 }
467
468 return Worklist;
469}
470
472 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
473 assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");
474 Value *IncV = PN->getIncomingValue(LatchIdx);
475
476 for (User *U : PN->users())
477 if (U != Cond && U != IncV) return false;
478
479 for (User *U : IncV->users())
480 if (U != Cond && U != PN) return false;
481 return true;
482}
483
484
486 LoopInfo *LI, MemorySSA *MSSA) {
487 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
488 auto *Preheader = L->getLoopPreheader();
489 assert(Preheader && "Preheader should exist!");
490
491 std::unique_ptr<MemorySSAUpdater> MSSAU;
492 if (MSSA)
493 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
494
495 // Now that we know the removal is safe, remove the loop by changing the
496 // branch from the preheader to go to the single exit block.
497 //
498 // Because we're deleting a large chunk of code at once, the sequence in which
499 // we remove things is very important to avoid invalidation issues.
500
501 // Tell ScalarEvolution that the loop is deleted. Do this before
502 // deleting the loop so that ScalarEvolution can look at the loop
503 // to determine what it needs to clean up.
504 if (SE) {
505 SE->forgetLoop(L);
507 }
508
509 Instruction *OldTerm = Preheader->getTerminator();
510 assert(!OldTerm->mayHaveSideEffects() &&
511 "Preheader must end with a side-effect-free terminator");
512 assert(OldTerm->getNumSuccessors() == 1 &&
513 "Preheader must have a single successor");
514 // Connect the preheader to the exit block. Keep the old edge to the header
515 // around to perform the dominator tree update in two separate steps
516 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
517 // preheader -> header.
518 //
519 //
520 // 0. Preheader 1. Preheader 2. Preheader
521 // | | | |
522 // V | V |
523 // Header <--\ | Header <--\ | Header <--\
524 // | | | | | | | | | | |
525 // | V | | | V | | | V |
526 // | Body --/ | | Body --/ | | Body --/
527 // V V V V V
528 // Exit Exit Exit
529 //
530 // By doing this is two separate steps we can perform the dominator tree
531 // update without using the batch update API.
532 //
533 // Even when the loop is never executed, we cannot remove the edge from the
534 // source block to the exit block. Consider the case where the unexecuted loop
535 // branches back to an outer loop. If we deleted the loop and removed the edge
536 // coming to this inner loop, this will break the outer loop structure (by
537 // deleting the backedge of the outer loop). If the outer loop is indeed a
538 // non-loop, it will be deleted in a future iteration of loop deletion pass.
539 IRBuilder<> Builder(OldTerm);
540
541 auto *ExitBlock = L->getUniqueExitBlock();
542 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
543 if (ExitBlock) {
544 assert(ExitBlock && "Should have a unique exit block!");
545 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
546
547 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
548 // Remove the old branch. The conditional branch becomes a new terminator.
549 OldTerm->eraseFromParent();
550
551 // Rewrite phis in the exit block to get their inputs from the Preheader
552 // instead of the exiting block.
553 for (PHINode &P : ExitBlock->phis()) {
554 // Set the zero'th element of Phi to be from the preheader and remove all
555 // other incoming values. Given the loop has dedicated exits, all other
556 // incoming values must be from the exiting blocks.
557 int PredIndex = 0;
558 P.setIncomingBlock(PredIndex, Preheader);
559 // Removes all incoming values from all other exiting blocks (including
560 // duplicate values from an exiting block).
561 // Nuke all entries except the zero'th entry which is the preheader entry.
562 P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
563 /* DeletePHIIfEmpty */ false);
564
565 assert((P.getNumIncomingValues() == 1 &&
566 P.getIncomingBlock(PredIndex) == Preheader) &&
567 "Should have exactly one value and that's from the preheader!");
568 }
569
570 if (DT) {
571 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
572 if (MSSA) {
573 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
574 *DT);
575 if (VerifyMemorySSA)
576 MSSA->verifyMemorySSA();
577 }
578 }
579
580 // Disconnect the loop body by branching directly to its exit.
581 Builder.SetInsertPoint(Preheader->getTerminator());
582 Builder.CreateBr(ExitBlock);
583 // Remove the old branch.
584 Preheader->getTerminator()->eraseFromParent();
585 } else {
586 assert(L->hasNoExitBlocks() &&
587 "Loop should have either zero or one exit blocks.");
588
589 Builder.SetInsertPoint(OldTerm);
590 Builder.CreateUnreachable();
591 Preheader->getTerminator()->eraseFromParent();
592 }
593
594 if (DT) {
595 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
596 if (MSSA) {
597 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
598 *DT);
599 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
600 L->block_end());
601 MSSAU->removeBlocks(DeadBlockSet);
602 if (VerifyMemorySSA)
603 MSSA->verifyMemorySSA();
604 }
605 }
606
607 // Use a map to unique and a vector to guarantee deterministic ordering.
609 llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;
610
611 if (ExitBlock) {
612 // Given LCSSA form is satisfied, we should not have users of instructions
613 // within the dead loop outside of the loop. However, LCSSA doesn't take
614 // unreachable uses into account. We handle them here.
615 // We could do it after drop all references (in this case all users in the
616 // loop will be already eliminated and we have less work to do but according
617 // to API doc of User::dropAllReferences only valid operation after dropping
618 // references, is deletion. So let's substitute all usages of
619 // instruction from the loop with poison value of corresponding type first.
620 for (auto *Block : L->blocks())
621 for (Instruction &I : *Block) {
622 auto *Poison = PoisonValue::get(I.getType());
623 for (Use &U : llvm::make_early_inc_range(I.uses())) {
624 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
625 if (L->contains(Usr->getParent()))
626 continue;
627 // If we have a DT then we can check that uses outside a loop only in
628 // unreachable block.
629 if (DT)
631 "Unexpected user in reachable block");
632 U.set(Poison);
633 }
634
635 // For one of each variable encountered, preserve a debug record (set
636 // to Poison) and transfer it to the loop exit. This terminates any
637 // variable locations that were set during the loop.
638 for (DbgVariableRecord &DVR :
639 llvm::make_early_inc_range(filterDbgVars(I.getDbgRecordRange()))) {
640 DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
641 DVR.getDebugLoc().get());
642 if (!DeadDebugSet.insert(Key).second)
643 continue;
644 // Unlinks the DVR from it's container, for later insertion.
645 DVR.removeFromParent();
646 DeadDbgVariableRecords.push_back(&DVR);
647 }
648 }
649
650 // After the loop has been deleted all the values defined and modified
651 // inside the loop are going to be unavailable. Values computed in the
652 // loop will have been deleted, automatically causing their debug uses
653 // be be replaced with undef. Loop invariant values will still be available.
654 // Move dbg.values out the loop so that earlier location ranges are still
655 // terminated and loop invariant assignments are preserved.
656 DIBuilder DIB(*ExitBlock->getModule());
657 BasicBlock::iterator InsertDbgValueBefore =
658 ExitBlock->getFirstInsertionPt();
659 assert(InsertDbgValueBefore != ExitBlock->end() &&
660 "There should be a non-PHI instruction in exit block, else these "
661 "instructions will have no parent.");
662
663 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
664 // each DbgVariableRecord right at the start of the block, wheras dbg.values
665 // would be repeatedly inserted before the first instruction. To replicate
666 // this behaviour, do it backwards.
667 for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords))
668 ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);
669 }
670
671 // Remove the block from the reference counting scheme, so that we can
672 // delete it freely later.
673 for (auto *Block : L->blocks())
674 Block->dropAllReferences();
675
676 if (MSSA && VerifyMemorySSA)
677 MSSA->verifyMemorySSA();
678
679 if (LI) {
680 // Erase the instructions and the blocks without having to worry
681 // about ordering because we already dropped the references.
682 // NOTE: This iteration is safe because erasing the block does not remove
683 // its entry from the loop's block list. We do that in the next section.
684 for (BasicBlock *BB : L->blocks())
685 BB->eraseFromParent();
686
687 // Finally, the blocks from loopinfo. This has to happen late because
688 // otherwise our loop iterators won't work.
689
691 for (BasicBlock *BB : blocks)
692 LI->removeBlock(BB);
693
694 // The last step is to update LoopInfo now that we've eliminated this loop.
695 // Note: LoopInfo::erase remove the given loop and relink its subloops with
696 // its parent. While removeLoop/removeChildLoop remove the given loop but
697 // not relink its subloops, which is what we want.
698 if (Loop *ParentLoop = L->getParentLoop()) {
699 Loop::iterator I = find(*ParentLoop, L);
700 assert(I != ParentLoop->end() && "Couldn't find loop");
701 ParentLoop->removeChildLoop(I);
702 } else {
703 Loop::iterator I = find(*LI, L);
704 assert(I != LI->end() && "Couldn't find loop");
705 LI->removeLoop(I);
706 }
707 LI->destroy(L);
708 }
709}
710
712 LoopInfo &LI, MemorySSA *MSSA) {
713 auto *Latch = L->getLoopLatch();
714 assert(Latch && "multiple latches not yet supported");
715 auto *Header = L->getHeader();
716 Loop *OutermostLoop = L->getOutermostLoop();
717
718 SE.forgetLoop(L);
720
721 std::unique_ptr<MemorySSAUpdater> MSSAU;
722 if (MSSA)
723 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
724
725 // Update the CFG and domtree. We chose to special case a couple of
726 // of common cases for code quality and test readability reasons.
727 [&]() -> void {
728 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
729 if (!BI->isConditional()) {
730 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
731 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
732 MSSAU.get());
733 return;
734 }
735
736 // Conditional latch/exit - note that latch can be shared by inner
737 // and outer loop so the other target doesn't need to an exit
738 if (L->isLoopExiting(Latch)) {
739 // TODO: Generalize ConstantFoldTerminator so that it can be used
740 // here without invalidating LCSSA or MemorySSA. (Tricky case for
741 // LCSSA: header is an exit block of a preceeding sibling loop w/o
742 // dedicated exits.)
743 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
744 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
745
746 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
747 Header->removePredecessor(Latch, true);
748
749 IRBuilder<> Builder(BI);
750 auto *NewBI = Builder.CreateBr(ExitBB);
751 // Transfer the metadata to the new branch instruction (minus the
752 // loop info since this is no longer a loop)
753 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
754 LLVMContext::MD_annotation});
755
756 BI->eraseFromParent();
757 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
758 if (MSSA)
759 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
760 return;
761 }
762 }
763
764 // General case. By splitting the backedge, and then explicitly making it
765 // unreachable we gracefully handle corner cases such as switch and invoke
766 // termiantors.
767 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
768
769 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
770 (void)changeToUnreachable(BackedgeBB->getTerminator(),
771 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
772 }();
773
774 // Erase (and destroy) this loop instance. Handles relinking sub-loops
775 // and blocks within the loop as needed.
776 LI.erase(L);
777
778 // If the loop we broke had a parent, then changeToUnreachable might have
779 // caused a block to be removed from the parent loop (see loop_nest_lcssa
780 // test case in zero-btc.ll for an example), thus changing the parent's
781 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
782 // loop which might have a had a block removed.
783 if (OutermostLoop != L)
784 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
785}
786
787
788/// Checks if \p L has an exiting latch branch. There may also be other
789/// exiting blocks. Returns branch instruction terminating the loop
790/// latch if above check is successful, nullptr otherwise.
792 BasicBlock *Latch = L->getLoopLatch();
793 if (!Latch)
794 return nullptr;
795
796 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
797 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
798 return nullptr;
799
800 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
801 LatchBR->getSuccessor(1) == L->getHeader()) &&
802 "At least one edge out of the latch must go to the header");
803
804 return LatchBR;
805}
806
807struct DbgLoop {
808 const Loop *L;
809 explicit DbgLoop(const Loop *L) : L(L) {}
810};
811
812#ifndef NDEBUG
814 OS << "function ";
815 D.L->getHeader()->getParent()->printAsOperand(OS, /*PrintType=*/false);
816 return OS << " " << *D.L;
817}
818#endif // NDEBUG
819
820static std::optional<unsigned> estimateLoopTripCount(Loop *L) {
821 // Currently we take the estimate exit count only from the loop latch,
822 // ignoring other exiting blocks. This can overestimate the trip count
823 // if we exit through another exit, but can never underestimate it.
824 // TODO: incorporate information from other exits
825 BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L);
826 if (!ExitingBranch) {
827 LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to find exiting "
828 << "latch branch of required form in " << DbgLoop(L)
829 << "\n");
830 return std::nullopt;
831 }
832
833 // To estimate the number of times the loop body was executed, we want to
834 // know the number of times the backedge was taken, vs. the number of times
835 // we exited the loop.
836 uint64_t LoopWeight, ExitWeight;
837 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) {
838 LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to extract branch "
839 << "weights for " << DbgLoop(L) << "\n");
840 return std::nullopt;
841 }
842
843 if (L->contains(ExitingBranch->getSuccessor(1)))
844 std::swap(LoopWeight, ExitWeight);
845
846 if (!ExitWeight) {
847 // Don't have a way to return predicated infinite
848 LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed because of zero exit "
849 << "probability for " << DbgLoop(L) << "\n");
850 return std::nullopt;
851 }
852
853 // Estimated exit count is a ratio of the loop weight by the weight of the
854 // edge exiting the loop, rounded to nearest.
855 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
856
857 // When ExitCount + 1 would wrap in unsigned, saturate at UINT_MAX.
858 if (ExitCount >= std::numeric_limits<unsigned>::max())
859 return std::numeric_limits<unsigned>::max();
860
861 // Estimated trip count is one plus estimated exit count.
862 uint64_t TC = ExitCount + 1;
863 LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Estimated trip count of " << TC
864 << " for " << DbgLoop(L) << "\n");
865 return TC;
866}
867
868std::optional<unsigned>
870 unsigned *EstimatedLoopInvocationWeight) {
871 // If EstimatedLoopInvocationWeight, we do not support this loop if
872 // getExpectedExitLoopLatchBranch returns nullptr.
873 //
874 // FIXME: Also, this is a stop-gap solution for nested loops. It avoids
875 // mistaking LLVMLoopEstimatedTripCount metadata to be for an outer loop when
876 // it was created for an inner loop. The problem is that loop metadata is
877 // attached to the branch instruction in the loop latch block, but that can be
878 // shared by the loops. A solution is to attach loop metadata to loop headers
879 // instead, but that would be a large change to LLVM.
880 //
881 // Until that happens, we work around the problem as follows.
882 // getExpectedExitLoopLatchBranch (which also guards
883 // setLoopEstimatedTripCount) returns nullptr for a loop unless the loop has
884 // one latch and that latch has exactly two successors one of which is an exit
885 // from the loop. If the latch is shared by nested loops, then that condition
886 // might hold for the inner loop but cannot hold for the outer loop:
887 // - Because the latch is shared, it must have at least two successors: the
888 // inner loop header and the outer loop header, which is also an exit for
889 // the inner loop. That satisifies the condition for the inner loop.
890 // - To satsify the condition for the outer loop, the latch must have a third
891 // successor that is an exit for the outer loop. But that violates the
892 // condition for both loops.
893 BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L);
894 if (!ExitingBranch)
895 return std::nullopt;
896
897 // If requested, either compute *EstimatedLoopInvocationWeight or return
898 // nullopt if cannot.
899 //
900 // TODO: Eventually, once all passes have migrated away from setting branch
901 // weights to indicate estimated trip counts, this function will drop the
902 // EstimatedLoopInvocationWeight parameter.
903 if (EstimatedLoopInvocationWeight) {
904 uint64_t LoopWeight = 0, ExitWeight = 0; // Inits expected to be unused.
905 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
906 return std::nullopt;
907 if (L->contains(ExitingBranch->getSuccessor(1)))
908 std::swap(LoopWeight, ExitWeight);
909 if (!ExitWeight)
910 return std::nullopt;
911 *EstimatedLoopInvocationWeight = ExitWeight;
912 }
913
914 // Return the estimated trip count from metadata unless the metadata is
915 // missing or has no value.
916 //
917 // Some passes set llvm.loop.estimated_trip_count to 0. For example, after
918 // peeling 10 or more iterations from a loop with an estimated trip count of
919 // 10, llvm.loop.estimated_trip_count becomes 0 on the remaining loop. It
920 // indicates that, each time execution reaches the peeled iterations,
921 // execution is estimated to exit them without reaching the remaining loop's
922 // header.
923 //
924 // Even if the probability of reaching a loop's header is low, if it is
925 // reached, it is the start of an iteration. Consequently, some passes
926 // historically assume that llvm::getLoopEstimatedTripCount always returns a
927 // positive count or std::nullopt. Thus, return std::nullopt when
928 // llvm.loop.estimated_trip_count is 0.
930 LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: "
931 << LLVMLoopEstimatedTripCount << " metadata has trip "
932 << "count of " << *TC
933 << (*TC == 0 ? " (returning std::nullopt)" : "")
934 << " for " << DbgLoop(L) << "\n");
935 return *TC == 0 ? std::nullopt : std::optional(*TC);
936 }
937
938 // Estimate the trip count from latch branch weights.
939 return estimateLoopTripCount(L);
940}
941
943 Loop *L, unsigned EstimatedTripCount,
944 std::optional<unsigned> EstimatedloopInvocationWeight) {
945 // If EstimatedLoopInvocationWeight, we do not support this loop if
946 // getExpectedExitLoopLatchBranch returns nullptr.
947 //
948 // FIXME: See comments in getLoopEstimatedTripCount for why this is required
949 // here regardless of EstimatedLoopInvocationWeight.
951 if (!LatchBranch)
952 return false;
953
954 // Set the metadata.
956
957 // At the moment, we currently support changing the estimated trip count in
958 // the latch branch's branch weights only. We could extend this API to
959 // manipulate estimated trip counts for any exit.
960 //
961 // TODO: Eventually, once all passes have migrated away from setting branch
962 // weights to indicate estimated trip counts, we will not set branch weights
963 // here at all.
964 if (!EstimatedloopInvocationWeight)
965 return true;
966
967 // Calculate taken and exit weights.
968 unsigned LatchExitWeight = 0;
969 unsigned BackedgeTakenWeight = 0;
970
971 if (EstimatedTripCount != 0) {
972 LatchExitWeight = *EstimatedloopInvocationWeight;
973 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
974 }
975
976 // Make a swap if back edge is taken when condition is "false".
977 if (LatchBranch->getSuccessor(0) != L->getHeader())
978 std::swap(BackedgeTakenWeight, LatchExitWeight);
979
980 // Set/Update profile metadata.
981 setBranchWeights(*LatchBranch, {BackedgeTakenWeight, LatchExitWeight},
982 /*IsExpected=*/false);
983
984 return true;
985}
986
989 if (!LatchBranch)
991 bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
992 return getBranchProbability(LatchBranch, FirstTargetIsLoop);
993}
994
997 if (!LatchBranch)
998 return false;
999 bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
1000 return setBranchProbability(LatchBranch, P, FirstTargetIsLoop);
1001}
1002
1004 bool ForFirstTarget) {
1005 if (B->getNumSuccessors() != 2)
1007 uint64_t Weight0, Weight1;
1008 if (!extractBranchWeights(*B, Weight0, Weight1))
1010 uint64_t Denominator = Weight0 + Weight1;
1011 if (Denominator == 0)
1013 if (!ForFirstTarget)
1014 std::swap(Weight0, Weight1);
1015 return BranchProbability::getBranchProbability(Weight0, Denominator);
1016}
1017
1019 bool ForFirstTarget) {
1020 if (B->getNumSuccessors() != 2)
1021 return false;
1022 BranchProbability Prob0 = P;
1023 BranchProbability Prob1 = P.getCompl();
1024 if (!ForFirstTarget)
1025 std::swap(Prob0, Prob1);
1026 setBranchWeights(*B, {Prob0.getNumerator(), Prob1.getNumerator()},
1027 /*IsExpected=*/false);
1028 return true;
1029}
1030
1032 ScalarEvolution &SE) {
1033 Loop *OuterL = InnerLoop->getParentLoop();
1034 if (!OuterL)
1035 return true;
1036
1037 // Get the backedge taken count for the inner loop
1038 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1039 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
1040 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
1041 !InnerLoopBECountSC->getType()->isIntegerTy())
1042 return false;
1043
1044 // Get whether count is invariant to the outer loop
1046 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
1048 return false;
1049
1050 return true;
1051}
1052
1054 switch (RK) {
1055 default:
1056 llvm_unreachable("Unexpected recurrence kind");
1058 case RecurKind::Sub:
1059 case RecurKind::Add:
1060 return Intrinsic::vector_reduce_add;
1061 case RecurKind::Mul:
1062 return Intrinsic::vector_reduce_mul;
1063 case RecurKind::And:
1064 return Intrinsic::vector_reduce_and;
1065 case RecurKind::Or:
1066 return Intrinsic::vector_reduce_or;
1067 case RecurKind::Xor:
1068 return Intrinsic::vector_reduce_xor;
1069 case RecurKind::FMulAdd:
1070 case RecurKind::FAdd:
1071 return Intrinsic::vector_reduce_fadd;
1072 case RecurKind::FMul:
1073 return Intrinsic::vector_reduce_fmul;
1074 case RecurKind::SMax:
1075 return Intrinsic::vector_reduce_smax;
1076 case RecurKind::SMin:
1077 return Intrinsic::vector_reduce_smin;
1078 case RecurKind::UMax:
1079 return Intrinsic::vector_reduce_umax;
1080 case RecurKind::UMin:
1081 return Intrinsic::vector_reduce_umin;
1082 case RecurKind::FMax:
1083 case RecurKind::FMaxNum:
1084 return Intrinsic::vector_reduce_fmax;
1085 case RecurKind::FMin:
1086 case RecurKind::FMinNum:
1087 return Intrinsic::vector_reduce_fmin;
1089 return Intrinsic::vector_reduce_fmaximum;
1091 return Intrinsic::vector_reduce_fminimum;
1093 return Intrinsic::vector_reduce_fmax;
1095 return Intrinsic::vector_reduce_fmin;
1096 }
1097}
1098
1100 switch (IID) {
1101 default:
1102 llvm_unreachable("Unexpected intrinsic id");
1103 case Intrinsic::umin:
1104 return Intrinsic::vector_reduce_umin;
1105 case Intrinsic::umax:
1106 return Intrinsic::vector_reduce_umax;
1107 case Intrinsic::smin:
1108 return Intrinsic::vector_reduce_smin;
1109 case Intrinsic::smax:
1110 return Intrinsic::vector_reduce_smax;
1111 }
1112}
1113
1114// This is the inverse to getReductionForBinop
1116 switch (RdxID) {
1117 case Intrinsic::vector_reduce_fadd:
1118 return Instruction::FAdd;
1119 case Intrinsic::vector_reduce_fmul:
1120 return Instruction::FMul;
1121 case Intrinsic::vector_reduce_add:
1122 return Instruction::Add;
1123 case Intrinsic::vector_reduce_mul:
1124 return Instruction::Mul;
1125 case Intrinsic::vector_reduce_and:
1126 return Instruction::And;
1127 case Intrinsic::vector_reduce_or:
1128 return Instruction::Or;
1129 case Intrinsic::vector_reduce_xor:
1130 return Instruction::Xor;
1131 case Intrinsic::vector_reduce_smax:
1132 case Intrinsic::vector_reduce_smin:
1133 case Intrinsic::vector_reduce_umax:
1134 case Intrinsic::vector_reduce_umin:
1135 return Instruction::ICmp;
1136 case Intrinsic::vector_reduce_fmax:
1137 case Intrinsic::vector_reduce_fmin:
1138 return Instruction::FCmp;
1139 default:
1140 llvm_unreachable("Unexpected ID");
1141 }
1142}
1143
1144// This is the inverse to getArithmeticReductionInstruction
1146 switch (Opc) {
1147 default:
1148 break;
1149 case Instruction::Add:
1150 return Intrinsic::vector_reduce_add;
1151 case Instruction::Mul:
1152 return Intrinsic::vector_reduce_mul;
1153 case Instruction::And:
1154 return Intrinsic::vector_reduce_and;
1155 case Instruction::Or:
1156 return Intrinsic::vector_reduce_or;
1157 case Instruction::Xor:
1158 return Intrinsic::vector_reduce_xor;
1159 }
1161}
1162
1164 switch (RdxID) {
1165 default:
1166 llvm_unreachable("Unknown min/max recurrence kind");
1167 case Intrinsic::vector_reduce_umin:
1168 return Intrinsic::umin;
1169 case Intrinsic::vector_reduce_umax:
1170 return Intrinsic::umax;
1171 case Intrinsic::vector_reduce_smin:
1172 return Intrinsic::smin;
1173 case Intrinsic::vector_reduce_smax:
1174 return Intrinsic::smax;
1175 case Intrinsic::vector_reduce_fmin:
1176 return Intrinsic::minnum;
1177 case Intrinsic::vector_reduce_fmax:
1178 return Intrinsic::maxnum;
1179 case Intrinsic::vector_reduce_fminimum:
1180 return Intrinsic::minimum;
1181 case Intrinsic::vector_reduce_fmaximum:
1182 return Intrinsic::maximum;
1183 }
1184}
1185
1187 switch (RK) {
1188 default:
1189 llvm_unreachable("Unknown min/max recurrence kind");
1190 case RecurKind::UMin:
1191 return Intrinsic::umin;
1192 case RecurKind::UMax:
1193 return Intrinsic::umax;
1194 case RecurKind::SMin:
1195 return Intrinsic::smin;
1196 case RecurKind::SMax:
1197 return Intrinsic::smax;
1198 case RecurKind::FMin:
1199 case RecurKind::FMinNum:
1200 return Intrinsic::minnum;
1201 case RecurKind::FMax:
1202 case RecurKind::FMaxNum:
1203 return Intrinsic::maxnum;
1205 return Intrinsic::minimum;
1207 return Intrinsic::maximum;
1209 return Intrinsic::minimumnum;
1211 return Intrinsic::maximumnum;
1212 }
1213}
1214
1216 switch (RdxID) {
1217 case Intrinsic::vector_reduce_smax:
1218 return RecurKind::SMax;
1219 case Intrinsic::vector_reduce_smin:
1220 return RecurKind::SMin;
1221 case Intrinsic::vector_reduce_umax:
1222 return RecurKind::UMax;
1223 case Intrinsic::vector_reduce_umin:
1224 return RecurKind::UMin;
1225 case Intrinsic::vector_reduce_fmax:
1226 return RecurKind::FMax;
1227 case Intrinsic::vector_reduce_fmin:
1228 return RecurKind::FMin;
1229 default:
1230 return RecurKind::None;
1231 }
1232}
1233
1235 switch (RK) {
1236 default:
1237 llvm_unreachable("Unknown min/max recurrence kind");
1238 case RecurKind::UMin:
1239 return CmpInst::ICMP_ULT;
1240 case RecurKind::UMax:
1241 return CmpInst::ICMP_UGT;
1242 case RecurKind::SMin:
1243 return CmpInst::ICMP_SLT;
1244 case RecurKind::SMax:
1245 return CmpInst::ICMP_SGT;
1246 case RecurKind::FMin:
1247 return CmpInst::FCMP_OLT;
1248 case RecurKind::FMax:
1249 return CmpInst::FCMP_OGT;
1250 // We do not add FMinimum/FMaximum recurrence kind here since there is no
1251 // equivalent predicate which compares signed zeroes according to the
1252 // semantics of the intrinsics (llvm.minimum/maximum).
1253 }
1254}
1255
1257 Value *Right) {
1258 Type *Ty = Left->getType();
1259 if (Ty->isIntOrIntVectorTy() ||
1260 (RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
1264 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
1265 "rdx.minmax");
1266 }
1268 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
1269 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
1270 return Select;
1271}
1272
1273// Helper to generate an ordered reduction.
1275 unsigned Op, RecurKind RdxKind) {
1276 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1277
1278 // Extract and apply reduction ops in ascending order:
1279 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1280 Value *Result = Acc;
1281 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1282 Value *Ext =
1283 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
1284
1285 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1286 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
1287 "bin.rdx");
1288 } else {
1290 "Invalid min/max");
1291 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
1292 }
1293 }
1294
1295 return Result;
1296}
1297
1298// Helper to generate a log2 shuffle reduction.
1300 unsigned Op,
1302 RecurKind RdxKind) {
1303 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1304 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1305 // and vector ops, reducing the set of values being computed by half each
1306 // round.
1307 assert(isPowerOf2_32(VF) &&
1308 "Reduction emission only supported for pow2 vectors!");
1309 // Note: fast-math-flags flags are controlled by the builder configuration
1310 // and are assumed to apply to all generated arithmetic instructions. Other
1311 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1312 // of the builder configuration, and since they're not passed explicitly,
1313 // will never be relevant here. Note that it would be generally unsound to
1314 // propagate these from an intrinsic call to the expansion anyways as we/
1315 // change the order of operations.
1316 auto BuildShuffledOp = [&Builder, &Op,
1317 &RdxKind](SmallVectorImpl<int> &ShuffleMask,
1318 Value *&TmpVec) -> void {
1319 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1320 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1321 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1322 "bin.rdx");
1323 } else {
1325 "Invalid min/max");
1326 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1327 }
1328 };
1329
1330 Value *TmpVec = Src;
1332 SmallVector<int, 32> ShuffleMask(VF);
1333 for (unsigned stride = 1; stride < VF; stride <<= 1) {
1334 // Initialise the mask with undef.
1335 llvm::fill(ShuffleMask, -1);
1336 for (unsigned j = 0; j < VF; j += stride << 1) {
1337 ShuffleMask[j] = j + stride;
1338 }
1339 BuildShuffledOp(ShuffleMask, TmpVec);
1340 }
1341 } else {
1342 SmallVector<int, 32> ShuffleMask(VF);
1343 for (unsigned i = VF; i != 1; i >>= 1) {
1344 // Move the upper half of the vector to the lower half.
1345 for (unsigned j = 0; j != i / 2; ++j)
1346 ShuffleMask[j] = i / 2 + j;
1347
1348 // Fill the rest of the mask with undef.
1349 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1350 BuildShuffledOp(ShuffleMask, TmpVec);
1351 }
1352 }
1353 // The result is in the first element of the vector.
1354 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1355}
1356
1358 Value *InitVal, PHINode *OrigPhi) {
1359 Value *NewVal = nullptr;
1360
1361 // First use the original phi to determine the new value we're trying to
1362 // select from in the loop.
1363 SelectInst *SI = nullptr;
1364 for (auto *U : OrigPhi->users()) {
1365 if ((SI = dyn_cast<SelectInst>(U)))
1366 break;
1367 }
1368 assert(SI && "One user of the original phi should be a select");
1369
1370 if (SI->getTrueValue() == OrigPhi)
1371 NewVal = SI->getFalseValue();
1372 else {
1373 assert(SI->getFalseValue() == OrigPhi &&
1374 "At least one input to the select should be the original Phi");
1375 NewVal = SI->getTrueValue();
1376 }
1377
1378 // If any predicate is true it means that we want to select the new value.
1379 Value *AnyOf =
1380 Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1381 // The compares in the loop may yield poison, which propagates through the
1382 // bitwise ORs. Freeze it here before the condition is used.
1383 AnyOf = Builder.CreateFreeze(AnyOf);
1384 return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
1385}
1386
1388 RecurKind RdxKind, Value *Start,
1389 Value *Sentinel) {
1390 bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
1391 bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind);
1392 Value *MaxRdx = Src->getType()->isVectorTy()
1393 ? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned)
1394 : Builder.CreateIntMinReduce(Src, IsSigned))
1395 : Src;
1396 // Correct the final reduction result back to the start value if the maximum
1397 // reduction is sentinel value.
1398 Value *Cmp =
1399 Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
1400 return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
1401}
1402
1404 FastMathFlags Flags) {
1405 bool Negative = false;
1406 switch (RdxID) {
1407 default:
1408 llvm_unreachable("Expecting a reduction intrinsic");
1409 case Intrinsic::vector_reduce_add:
1410 case Intrinsic::vector_reduce_mul:
1411 case Intrinsic::vector_reduce_or:
1412 case Intrinsic::vector_reduce_xor:
1413 case Intrinsic::vector_reduce_and:
1414 case Intrinsic::vector_reduce_fadd:
1415 case Intrinsic::vector_reduce_fmul: {
1416 unsigned Opc = getArithmeticReductionInstruction(RdxID);
1417 return ConstantExpr::getBinOpIdentity(Opc, Ty, false,
1418 Flags.noSignedZeros());
1419 }
1420 case Intrinsic::vector_reduce_umax:
1421 case Intrinsic::vector_reduce_umin:
1422 case Intrinsic::vector_reduce_smin:
1423 case Intrinsic::vector_reduce_smax: {
1425 return ConstantExpr::getIntrinsicIdentity(ScalarID, Ty);
1426 }
1427 case Intrinsic::vector_reduce_fmax:
1428 case Intrinsic::vector_reduce_fmaximum:
1429 Negative = true;
1430 [[fallthrough]];
1431 case Intrinsic::vector_reduce_fmin:
1432 case Intrinsic::vector_reduce_fminimum: {
1433 bool PropagatesNaN = RdxID == Intrinsic::vector_reduce_fminimum ||
1434 RdxID == Intrinsic::vector_reduce_fmaximum;
1435 const fltSemantics &Semantics = Ty->getFltSemantics();
1436 return (!Flags.noNaNs() && !PropagatesNaN)
1437 ? ConstantFP::getQNaN(Ty, Negative)
1438 : !Flags.noInfs()
1439 ? ConstantFP::getInfinity(Ty, Negative)
1440 : ConstantFP::get(Ty, APFloat::getLargest(Semantics, Negative));
1441 }
1442 }
1443}
1444
1446 assert((!(K == RecurKind::FMin || K == RecurKind::FMax) ||
1447 (FMF.noNaNs() && FMF.noSignedZeros())) &&
1448 "nnan, nsz is expected to be set for FP min/max reduction.");
1450 return getReductionIdentity(RdxID, Tp, FMF);
1451}
1452
1454 RecurKind RdxKind) {
1455 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1456 auto getIdentity = [&]() {
1457 return getRecurrenceIdentity(RdxKind, SrcVecEltTy,
1458 Builder.getFastMathFlags());
1459 };
1460 switch (RdxKind) {
1462 case RecurKind::Sub:
1463 case RecurKind::Add:
1464 case RecurKind::Mul:
1465 case RecurKind::And:
1466 case RecurKind::Or:
1467 case RecurKind::Xor:
1468 case RecurKind::SMax:
1469 case RecurKind::SMin:
1470 case RecurKind::UMax:
1471 case RecurKind::UMin:
1472 case RecurKind::FMax:
1473 case RecurKind::FMin:
1474 case RecurKind::FMinNum:
1475 case RecurKind::FMaxNum:
1480 return Builder.CreateUnaryIntrinsic(getReductionIntrinsicID(RdxKind), Src);
1481 case RecurKind::FMulAdd:
1482 case RecurKind::FAdd:
1483 return Builder.CreateFAddReduce(getIdentity(), Src);
1484 case RecurKind::FMul:
1485 return Builder.CreateFMulReduce(getIdentity(), Src);
1486 default:
1487 llvm_unreachable("Unhandled opcode");
1488 }
1489}
1490
1492 RecurKind Kind, Value *Mask, Value *EVL) {
1495 "AnyOf and FindIV reductions are not supported.");
1497 auto VPID = VPIntrinsic::getForIntrinsic(Id);
1499 "No VPIntrinsic for this reduction");
1500 auto *EltTy = cast<VectorType>(Src->getType())->getElementType();
1501 Value *Iden = getRecurrenceIdentity(Kind, EltTy, Builder.getFastMathFlags());
1502 Value *Ops[] = {Iden, Src, Mask, EVL};
1503 return Builder.CreateIntrinsic(EltTy, VPID, Ops);
1504}
1505
1507 Value *Src, Value *Start) {
1508 assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
1509 "Unexpected reduction kind");
1510 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1511 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1512
1513 return B.CreateFAddReduce(Start, Src);
1514}
1515
1517 Value *Src, Value *Start, Value *Mask,
1518 Value *EVL) {
1519 assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
1520 "Unexpected reduction kind");
1521 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1522 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1523
1525 auto VPID = VPIntrinsic::getForIntrinsic(Id);
1527 "No VPIntrinsic for this reduction");
1528 auto *EltTy = cast<VectorType>(Src->getType())->getElementType();
1529 Value *Ops[] = {Start, Src, Mask, EVL};
1530 return Builder.CreateIntrinsic(EltTy, VPID, Ops);
1531}
1532
1534 bool IncludeWrapFlags) {
1535 auto *VecOp = dyn_cast<Instruction>(I);
1536 if (!VecOp)
1537 return;
1538 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1539 : dyn_cast<Instruction>(OpValue);
1540 if (!Intersection)
1541 return;
1542 const unsigned Opcode = Intersection->getOpcode();
1543 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1544 for (auto *V : VL) {
1545 auto *Instr = dyn_cast<Instruction>(V);
1546 if (!Instr)
1547 continue;
1548 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1549 VecOp->andIRFlags(V);
1550 }
1551}
1552
1553bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1554 ScalarEvolution &SE) {
1555 const SCEV *Zero = SE.getZero(S->getType());
1556 return SE.isAvailableAtLoopEntry(S, L) &&
1558}
1559
1561 ScalarEvolution &SE) {
1562 const SCEV *Zero = SE.getZero(S->getType());
1563 return SE.isAvailableAtLoopEntry(S, L) &&
1565}
1566
1567bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1568 ScalarEvolution &SE) {
1569 const SCEV *Zero = SE.getZero(S->getType());
1570 return SE.isAvailableAtLoopEntry(S, L) &&
1572}
1573
1575 ScalarEvolution &SE) {
1576 const SCEV *Zero = SE.getZero(S->getType());
1577 return SE.isAvailableAtLoopEntry(S, L) &&
1579}
1580
1582 bool Signed) {
1583 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1586 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1587 return SE.isAvailableAtLoopEntry(S, L) &&
1588 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1589 SE.getConstant(Min));
1590}
1591
1593 bool Signed) {
1594 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1597 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1598 return SE.isAvailableAtLoopEntry(S, L) &&
1599 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1600 SE.getConstant(Max));
1601}
1602
1603//===----------------------------------------------------------------------===//
1604// rewriteLoopExitValues - Optimize IV users outside the loop.
1605// As a side effect, reduces the amount of IV processing within the loop.
1606//===----------------------------------------------------------------------===//
1607
1608static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1611 Visited.insert(I);
1612 WorkList.push_back(I);
1613 while (!WorkList.empty()) {
1614 const Instruction *Curr = WorkList.pop_back_val();
1615 // This use is outside the loop, nothing to do.
1616 if (!L->contains(Curr))
1617 continue;
1618 // Do we assume it is a "hard" use which will not be eliminated easily?
1619 if (Curr->mayHaveSideEffects())
1620 return true;
1621 // Otherwise, add all its users to worklist.
1622 for (const auto *U : Curr->users()) {
1623 auto *UI = cast<Instruction>(U);
1624 if (Visited.insert(UI).second)
1625 WorkList.push_back(UI);
1626 }
1627 }
1628 return false;
1629}
1630
1631// Collect information about PHI nodes which can be transformed in
1632// rewriteLoopExitValues.
1634 PHINode *PN; // For which PHI node is this replacement?
1635 unsigned Ith; // For which incoming value?
1636 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1637 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1638 bool HighCost; // Is this expansion a high-cost?
1639
1640 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1641 bool H)
1642 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1643 HighCost(H) {}
1644};
1645
1646// Check whether it is possible to delete the loop after rewriting exit
1647// value. If it is possible, ignore ReplaceExitValue and do rewriting
1648// aggressively.
1649static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1650 BasicBlock *Preheader = L->getLoopPreheader();
1651 // If there is no preheader, the loop will not be deleted.
1652 if (!Preheader)
1653 return false;
1654
1655 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1656 // We obviate multiple ExitingBlocks case for simplicity.
1657 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1658 // after exit value rewriting, we can enhance the logic here.
1659 SmallVector<BasicBlock *, 4> ExitingBlocks;
1660 L->getExitingBlocks(ExitingBlocks);
1662 L->getUniqueExitBlocks(ExitBlocks);
1663 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1664 return false;
1665
1666 BasicBlock *ExitBlock = ExitBlocks[0];
1667 BasicBlock::iterator BI = ExitBlock->begin();
1668 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1669 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1670
1671 // If the Incoming value of P is found in RewritePhiSet, we know it
1672 // could be rewritten to use a loop invariant value in transformation
1673 // phase later. Skip it in the loop invariant check below.
1674 bool found = false;
1675 for (const RewritePhi &Phi : RewritePhiSet) {
1676 unsigned i = Phi.Ith;
1677 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1678 found = true;
1679 break;
1680 }
1681 }
1682
1683 Instruction *I;
1684 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1685 if (!L->hasLoopInvariantOperands(I))
1686 return false;
1687
1688 ++BI;
1689 }
1690
1691 for (auto *BB : L->blocks())
1692 if (llvm::any_of(*BB, [](Instruction &I) {
1693 return I.mayHaveSideEffects();
1694 }))
1695 return false;
1696
1697 return true;
1698}
1699
1700/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1701/// and returns true if this Phi is an induction phi in the loop. When
1702/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1703static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1705 if (!Phi)
1706 return false;
1707 if (!L->getLoopPreheader())
1708 return false;
1709 if (Phi->getParent() != L->getHeader())
1710 return false;
1711 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1712}
1713
1715 ScalarEvolution *SE,
1716 const TargetTransformInfo *TTI,
1717 SCEVExpander &Rewriter, DominatorTree *DT,
1720 // Check a pre-condition.
1721 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1722 "Indvars did not preserve LCSSA!");
1723
1724 SmallVector<BasicBlock*, 8> ExitBlocks;
1725 L->getUniqueExitBlocks(ExitBlocks);
1726
1727 SmallVector<RewritePhi, 8> RewritePhiSet;
1728 // Find all values that are computed inside the loop, but used outside of it.
1729 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1730 // the exit blocks of the loop to find them.
1731 for (BasicBlock *ExitBB : ExitBlocks) {
1732 // If there are no PHI nodes in this exit block, then no values defined
1733 // inside the loop are used on this path, skip it.
1734 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1735 if (!PN) continue;
1736
1737 unsigned NumPreds = PN->getNumIncomingValues();
1738
1739 // Iterate over all of the PHI nodes.
1740 BasicBlock::iterator BBI = ExitBB->begin();
1741 while ((PN = dyn_cast<PHINode>(BBI++))) {
1742 if (PN->use_empty())
1743 continue; // dead use, don't replace it
1744
1745 if (!SE->isSCEVable(PN->getType()))
1746 continue;
1747
1748 // Iterate over all of the values in all the PHI nodes.
1749 for (unsigned i = 0; i != NumPreds; ++i) {
1750 // If the value being merged in is not integer or is not defined
1751 // in the loop, skip it.
1752 Value *InVal = PN->getIncomingValue(i);
1753 if (!isa<Instruction>(InVal))
1754 continue;
1755
1756 // If this pred is for a subloop, not L itself, skip it.
1757 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1758 continue; // The Block is in a subloop, skip it.
1759
1760 // Check that InVal is defined in the loop.
1761 Instruction *Inst = cast<Instruction>(InVal);
1762 if (!L->contains(Inst))
1763 continue;
1764
1765 // Find exit values which are induction variables in the loop, and are
1766 // unused in the loop, with the only use being the exit block PhiNode,
1767 // and the induction variable update binary operator.
1768 // The exit value can be replaced with the final value when it is cheap
1769 // to do so.
1772 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1773 if (IndPhi) {
1774 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1775 continue;
1776 // This is an induction PHI. Check that the only users are PHI
1777 // nodes, and induction variable update binary operators.
1778 if (llvm::any_of(Inst->users(), [&](User *U) {
1779 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1780 return true;
1781 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1782 if (B && B != ID.getInductionBinOp())
1783 return true;
1784 return false;
1785 }))
1786 continue;
1787 } else {
1788 // If it is not an induction phi, it must be an induction update
1789 // binary operator with an induction phi user.
1791 if (!B)
1792 continue;
1793 if (llvm::any_of(Inst->users(), [&](User *U) {
1794 PHINode *Phi = dyn_cast<PHINode>(U);
1795 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1796 return true;
1797 return false;
1798 }))
1799 continue;
1800 if (B != ID.getInductionBinOp())
1801 continue;
1802 }
1803 }
1804
1805 // Okay, this instruction has a user outside of the current loop
1806 // and varies predictably *inside* the loop. Evaluate the value it
1807 // contains when the loop exits, if possible. We prefer to start with
1808 // expressions which are true for all exits (so as to maximize
1809 // expression reuse by the SCEVExpander), but resort to per-exit
1810 // evaluation if that fails.
1811 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1812 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1813 !SE->isLoopInvariant(ExitValue, L) ||
1814 !Rewriter.isSafeToExpand(ExitValue)) {
1815 // TODO: This should probably be sunk into SCEV in some way; maybe a
1816 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1817 // most SCEV expressions and other recurrence types (e.g. shift
1818 // recurrences). Is there existing code we can reuse?
1819 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1820 if (isa<SCEVCouldNotCompute>(ExitCount))
1821 continue;
1822 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1823 if (AddRec->getLoop() == L)
1824 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1825 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1826 !SE->isLoopInvariant(ExitValue, L) ||
1827 !Rewriter.isSafeToExpand(ExitValue))
1828 continue;
1829 }
1830
1831 // Computing the value outside of the loop brings no benefit if it is
1832 // definitely used inside the loop in a way which can not be optimized
1833 // away. Avoid doing so unless we know we have a value which computes
1834 // the ExitValue already. TODO: This should be merged into SCEV
1835 // expander to leverage its knowledge of existing expressions.
1836 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1837 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1838 continue;
1839
1840 // Check if expansions of this SCEV would count as being high cost.
1841 bool HighCost = Rewriter.isHighCostExpansion(
1842 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1843
1844 // Note that we must not perform expansions until after
1845 // we query *all* the costs, because if we perform temporary expansion
1846 // inbetween, one that we might not intend to keep, said expansion
1847 // *may* affect cost calculation of the next SCEV's we'll query,
1848 // and next SCEV may errneously get smaller cost.
1849
1850 // Collect all the candidate PHINodes to be rewritten.
1851 Instruction *InsertPt =
1852 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1853 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1854 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1855 }
1856 }
1857 }
1858
1859 // TODO: evaluate whether it is beneficial to change how we calculate
1860 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1861 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1862 // potentially giving cost bonus to those other SCEV's?
1863
1864 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1865 int NumReplaced = 0;
1866
1867 // Transformation.
1868 for (const RewritePhi &Phi : RewritePhiSet) {
1869 PHINode *PN = Phi.PN;
1870
1871 // Only do the rewrite when the ExitValue can be expanded cheaply.
1872 // If LoopCanBeDel is true, rewrite exit value aggressively.
1875 !LoopCanBeDel && Phi.HighCost)
1876 continue;
1877
1878 Value *ExitVal = Rewriter.expandCodeFor(
1879 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1880
1881 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1882 << '\n'
1883 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1884
1885#ifndef NDEBUG
1886 // If we reuse an instruction from a loop which is neither L nor one of
1887 // its containing loops, we end up breaking LCSSA form for this loop by
1888 // creating a new use of its instruction.
1889 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1890 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1891 if (EVL != L)
1892 assert(EVL->contains(L) && "LCSSA breach detected!");
1893#endif
1894
1895 NumReplaced++;
1896 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1897 PN->setIncomingValue(Phi.Ith, ExitVal);
1898 // It's necessary to tell ScalarEvolution about this explicitly so that
1899 // it can walk the def-use list and forget all SCEVs, as it may not be
1900 // watching the PHI itself. Once the new exit value is in place, there
1901 // may not be a def-use connection between the loop and every instruction
1902 // which got a SCEVAddRecExpr for that loop.
1903 SE->forgetValue(PN);
1904
1905 // If this instruction is dead now, delete it. Don't do it now to avoid
1906 // invalidating iterators.
1907 if (isInstructionTriviallyDead(Inst, TLI))
1908 DeadInsts.push_back(Inst);
1909
1910 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1911 if (PN->getNumIncomingValues() == 1 &&
1912 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1913 PN->replaceAllUsesWith(ExitVal);
1914 PN->eraseFromParent();
1915 }
1916 }
1917
1918 // The insertion point instruction may have been deleted; clear it out
1919 // so that the rewriter doesn't trip over it later.
1920 Rewriter.clearInsertPoint();
1921 return NumReplaced;
1922}
1923
1924/// Utility that implements appending of loops onto a worklist.
1925/// Loops are added in preorder (analogous for reverse postorder for trees),
1926/// and the worklist is processed LIFO.
1927template <typename RangeT>
1929 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1930 // We use an internal worklist to build up the preorder traversal without
1931 // recursion.
1932 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1933
1934 // We walk the initial sequence of loops in reverse because we generally want
1935 // to visit defs before uses and the worklist is LIFO.
1936 for (Loop *RootL : Loops) {
1937 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1938 assert(PreOrderWorklist.empty() &&
1939 "Must start with an empty preorder walk worklist.");
1940 PreOrderWorklist.push_back(RootL);
1941 do {
1942 Loop *L = PreOrderWorklist.pop_back_val();
1943 PreOrderWorklist.append(L->begin(), L->end());
1944 PreOrderLoops.push_back(L);
1945 } while (!PreOrderWorklist.empty());
1946
1947 Worklist.insert(std::move(PreOrderLoops));
1948 PreOrderLoops.clear();
1949 }
1950}
1951
1952template <typename RangeT>
1956}
1957
1958template LLVM_EXPORT_TEMPLATE void
1961
1962template LLVM_EXPORT_TEMPLATE void
1965
1970
1972 LoopInfo *LI, LPPassManager *LPM) {
1973 Loop &New = *LI->AllocateLoop();
1974 if (PL)
1975 PL->addChildLoop(&New);
1976 else
1977 LI->addTopLevelLoop(&New);
1978
1979 if (LPM)
1980 LPM->addLoop(New);
1981
1982 // Add all of the blocks in L to the new loop.
1983 for (BasicBlock *BB : L->blocks())
1984 if (LI->getLoopFor(BB) == L)
1985 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1986
1987 // Add all of the subloops to the new loop.
1988 for (Loop *I : *L)
1989 cloneLoop(I, &New, VM, LI, LPM);
1990
1991 return &New;
1992}
1993
1994/// IR Values for the lower and upper bounds of a pointer evolution. We
1995/// need to use value-handles because SCEV expansion can invalidate previously
1996/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1997/// a previous one.
2003
2004/// Expand code for the lower and upper bound of the pointer group \p CG
2005/// in \p TheLoop. \return the values for the bounds.
2007 Loop *TheLoop, Instruction *Loc,
2008 SCEVExpander &Exp, bool HoistRuntimeChecks) {
2009 LLVMContext &Ctx = Loc->getContext();
2010 Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
2011
2012 Value *Start = nullptr, *End = nullptr;
2013 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2014 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
2015
2016 // If the Low and High values are themselves loop-variant, then we may want
2017 // to expand the range to include those covered by the outer loop as well.
2018 // There is a trade-off here with the advantage being that creating checks
2019 // using the expanded range permits the runtime memory checks to be hoisted
2020 // out of the outer loop. This reduces the cost of entering the inner loop,
2021 // which can be significant for low trip counts. The disadvantage is that
2022 // there is a chance we may now never enter the vectorized inner loop,
2023 // whereas using a restricted range check could have allowed us to enter at
2024 // least once. This is why the behaviour is not currently the default and is
2025 // controlled by the parameter 'HoistRuntimeChecks'.
2026 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
2028 auto *HighAR = cast<SCEVAddRecExpr>(High);
2029 auto *LowAR = cast<SCEVAddRecExpr>(Low);
2030 const Loop *OuterLoop = TheLoop->getParentLoop();
2031 ScalarEvolution &SE = *Exp.getSE();
2032 const SCEV *Recur = LowAR->getStepRecurrence(SE);
2033 if (Recur == HighAR->getStepRecurrence(SE) &&
2034 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
2035 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
2036 const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch);
2037 if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
2038 OuterExitCount->getType()->isIntegerTy()) {
2039 const SCEV *NewHigh =
2040 cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE);
2041 if (!isa<SCEVCouldNotCompute>(NewHigh)) {
2042 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
2043 "outer loop in order to permit hoisting\n");
2044 High = NewHigh;
2045 Low = cast<SCEVAddRecExpr>(Low)->getStart();
2046 // If there is a possibility that the stride is negative then we have
2047 // to generate extra checks to ensure the stride is positive.
2048 if (!SE.isKnownNonNegative(
2049 SE.applyLoopGuards(Recur, HighAR->getLoop()))) {
2050 Stride = Recur;
2051 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
2052 "positive: "
2053 << *Stride << '\n');
2054 }
2055 }
2056 }
2057 }
2058 }
2059
2060 Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
2061 End = Exp.expandCodeFor(High, PtrArithTy, Loc);
2062 if (CG->NeedsFreeze) {
2063 IRBuilder<> Builder(Loc);
2064 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
2065 End = Builder.CreateFreeze(End, End->getName() + ".fr");
2066 }
2067 Value *StrideVal =
2068 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
2069 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
2070 return {Start, End, StrideVal};
2071}
2072
2073/// Turns a collection of checks into a collection of expanded upper and
2074/// lower bounds for both pointers in the check.
2079
2080 // Here we're relying on the SCEV Expander's cache to only emit code for the
2081 // same bounds once.
2082 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
2083 [&](const RuntimePointerCheck &Check) {
2084 PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
2086 Second = expandBounds(Check.second, L, Loc, Exp,
2088 return std::make_pair(First, Second);
2089 });
2090
2091 return ChecksWithBounds;
2092}
2093
2095 Instruction *Loc, Loop *TheLoop,
2096 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
2097 SCEVExpander &Exp, bool HoistRuntimeChecks) {
2098 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
2099 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
2100 auto ExpandedChecks =
2101 expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
2102
2103 LLVMContext &Ctx = Loc->getContext();
2104 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
2105 ChkBuilder.SetInsertPoint(Loc);
2106 // Our instructions might fold to a constant.
2107 Value *MemoryRuntimeCheck = nullptr;
2108
2109 for (const auto &[A, B] : ExpandedChecks) {
2110 // Check if two pointers (A and B) conflict where conflict is computed as:
2111 // start(A) <= end(B) && start(B) <= end(A)
2112
2113 assert((A.Start->getType()->getPointerAddressSpace() ==
2114 B.End->getType()->getPointerAddressSpace()) &&
2115 (B.Start->getType()->getPointerAddressSpace() ==
2116 A.End->getType()->getPointerAddressSpace()) &&
2117 "Trying to bounds check pointers with different address spaces");
2118
2119 // [A|B].Start points to the first accessed byte under base [A|B].
2120 // [A|B].End points to the last accessed byte, plus one.
2121 // There is no conflict when the intervals are disjoint:
2122 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2123 //
2124 // bound0 = (B.Start < A.End)
2125 // bound1 = (A.Start < B.End)
2126 // IsConflict = bound0 & bound1
2127 Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
2128 Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
2129 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2130 if (A.StrideToCheck) {
2131 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
2132 A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
2133 "stride.check");
2134 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
2135 }
2136 if (B.StrideToCheck) {
2137 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
2138 B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
2139 "stride.check");
2140 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
2141 }
2142 if (MemoryRuntimeCheck) {
2143 IsConflict =
2144 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2145 }
2146 MemoryRuntimeCheck = IsConflict;
2147 }
2148
2149 Exp.eraseDeadInstructions(MemoryRuntimeCheck);
2150 return MemoryRuntimeCheck;
2151}
2152
2155 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
2156
2157 LLVMContext &Ctx = Loc->getContext();
2158 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
2159 ChkBuilder.SetInsertPoint(Loc);
2160 // Our instructions might fold to a constant.
2161 Value *MemoryRuntimeCheck = nullptr;
2162
2163 auto &SE = *Expander.getSE();
2164 // Map to keep track of created compares, The key is the pair of operands for
2165 // the compare, to allow detecting and re-using redundant compares.
2167 for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
2168 Type *Ty = SinkStart->getType();
2169 // Compute VF * IC * AccessSize.
2170 auto *VFTimesICTimesSize =
2171 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
2172 ConstantInt::get(Ty, IC * AccessSize));
2173 Value *Diff =
2174 Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
2175
2176 // Check if the same compare has already been created earlier. In that case,
2177 // there is no need to check it again.
2178 Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
2179 if (IsConflict)
2180 continue;
2181
2182 IsConflict =
2183 ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
2184 SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
2185 if (NeedsFreeze)
2186 IsConflict =
2187 ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
2188 if (MemoryRuntimeCheck) {
2189 IsConflict =
2190 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2191 }
2192 MemoryRuntimeCheck = IsConflict;
2193 }
2194
2195 Expander.eraseDeadInstructions(MemoryRuntimeCheck);
2196 return MemoryRuntimeCheck;
2197}
2198
2199std::optional<IVConditionInfo>
2201 const MemorySSA &MSSA, AAResults &AA) {
2202 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
2203 if (!TI || !TI->isConditional())
2204 return {};
2205
2206 auto *CondI = dyn_cast<Instruction>(TI->getCondition());
2207 // The case with the condition outside the loop should already be handled
2208 // earlier.
2209 // Allow CmpInst and TruncInsts as they may be users of load instructions
2210 // and have potential for partial unswitching
2211 if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))
2212 return {};
2213
2214 SmallVector<Instruction *> InstToDuplicate;
2215 InstToDuplicate.push_back(CondI);
2216
2217 SmallVector<Value *, 4> WorkList;
2218 WorkList.append(CondI->op_begin(), CondI->op_end());
2219
2220 SmallVector<MemoryAccess *, 4> AccessesToCheck;
2221 SmallVector<MemoryLocation, 4> AccessedLocs;
2222 while (!WorkList.empty()) {
2224 if (!I || !L.contains(I))
2225 continue;
2226
2227 // TODO: support additional instructions.
2229 return {};
2230
2231 // Do not duplicate volatile and atomic loads.
2232 if (auto *LI = dyn_cast<LoadInst>(I))
2233 if (LI->isVolatile() || LI->isAtomic())
2234 return {};
2235
2236 InstToDuplicate.push_back(I);
2237 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
2238 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
2239 // Queue the defining access to check for alias checks.
2240 AccessesToCheck.push_back(MemUse->getDefiningAccess());
2241 AccessedLocs.push_back(MemoryLocation::get(I));
2242 } else {
2243 // MemoryDefs may clobber the location or may be atomic memory
2244 // operations. Bail out.
2245 return {};
2246 }
2247 }
2248 WorkList.append(I->op_begin(), I->op_end());
2249 }
2250
2251 if (InstToDuplicate.empty())
2252 return {};
2253
2254 SmallVector<BasicBlock *, 4> ExitingBlocks;
2255 L.getExitingBlocks(ExitingBlocks);
2256 auto HasNoClobbersOnPath =
2257 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2258 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
2259 SmallVector<MemoryAccess *, 4> AccessesToCheck)
2260 -> std::optional<IVConditionInfo> {
2261 IVConditionInfo Info;
2262 // First, collect all blocks in the loop that are on a patch from Succ
2263 // to the header.
2265 WorkList.push_back(Succ);
2266 WorkList.push_back(Header);
2268 Seen.insert(Header);
2269 Info.PathIsNoop &=
2270 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2271
2272 while (!WorkList.empty()) {
2273 BasicBlock *Current = WorkList.pop_back_val();
2274 if (!L.contains(Current))
2275 continue;
2276 const auto &SeenIns = Seen.insert(Current);
2277 if (!SeenIns.second)
2278 continue;
2279
2280 Info.PathIsNoop &= all_of(
2281 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2282 WorkList.append(succ_begin(Current), succ_end(Current));
2283 }
2284
2285 // Require at least 2 blocks on a path through the loop. This skips
2286 // paths that directly exit the loop.
2287 if (Seen.size() < 2)
2288 return {};
2289
2290 // Next, check if there are any MemoryDefs that are on the path through
2291 // the loop (in the Seen set) and they may-alias any of the locations in
2292 // AccessedLocs. If that is the case, they may modify the condition and
2293 // partial unswitching is not possible.
2294 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
2295 while (!AccessesToCheck.empty()) {
2296 MemoryAccess *Current = AccessesToCheck.pop_back_val();
2297 auto SeenI = SeenAccesses.insert(Current);
2298 if (!SeenI.second || !Seen.contains(Current->getBlock()))
2299 continue;
2300
2301 // Bail out if exceeded the threshold.
2302 if (SeenAccesses.size() >= MSSAThreshold)
2303 return {};
2304
2305 // MemoryUse are read-only accesses.
2306 if (isa<MemoryUse>(Current))
2307 continue;
2308
2309 // For a MemoryDef, check if is aliases any of the location feeding
2310 // the original condition.
2311 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
2312 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
2313 return isModSet(
2314 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
2315 }))
2316 return {};
2317 }
2318
2319 for (Use &U : Current->uses())
2320 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
2321 }
2322
2323 // We could also allow loops with known trip counts without mustprogress,
2324 // but ScalarEvolution may not be available.
2325 Info.PathIsNoop &= isMustProgress(&L);
2326
2327 // If the path is considered a no-op so far, check if it reaches a
2328 // single exit block without any phis. This ensures no values from the
2329 // loop are used outside of the loop.
2330 if (Info.PathIsNoop) {
2331 for (auto *Exiting : ExitingBlocks) {
2332 if (!Seen.contains(Exiting))
2333 continue;
2334 for (auto *Succ : successors(Exiting)) {
2335 if (L.contains(Succ))
2336 continue;
2337
2338 Info.PathIsNoop &= Succ->phis().empty() &&
2339 (!Info.ExitForPath || Info.ExitForPath == Succ);
2340 if (!Info.PathIsNoop)
2341 break;
2342 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
2343 "cannot have multiple exit blocks");
2344 Info.ExitForPath = Succ;
2345 }
2346 }
2347 }
2348 if (!Info.ExitForPath)
2349 Info.PathIsNoop = false;
2350
2351 Info.InstToDuplicate = InstToDuplicate;
2352 return Info;
2353 };
2354
2355 // If we branch to the same successor, partial unswitching will not be
2356 // beneficial.
2357 if (TI->getSuccessor(0) == TI->getSuccessor(1))
2358 return {};
2359
2360 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2361 AccessesToCheck)) {
2362 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2363 return Info;
2364 }
2365 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2366 AccessesToCheck)) {
2367 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2368 return Info;
2369 }
2370
2371 return {};
2372}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXPORT_TEMPLATE
Definition Compiler.h:215
This file defines the DenseSet and SmallDenseSet classes.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
static const char * LLVMLoopDisableLICM
Definition LoopUtils.cpp:56
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
static const char * LLVMLoopDisableNonforced
Definition LoopUtils.cpp:55
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
static std::optional< unsigned > estimateLoopTripCount(Loop *L)
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t High
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition Pass.cpp:284
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static BranchProbability getUnknown()
uint32_t getNumerator() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntrinsicIdentity(Intrinsic::ID, Type *Ty)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool noSignedZeros() const
Definition FMF.h:67
bool noNaNs() const
Definition FMF.h:65
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2364
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addLoop(Loop &L)
Definition LoopPass.cpp:77
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
typename std::vector< Loop * >::const_iterator iterator
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441
LLVM_ABI void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition LoopInfo.cpp:887
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:502
Metadata node.
Definition Metadata.h:1078
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
Tuple of metadata.
Definition Metadata.h:1497
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:993
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
Root of the metadata hierarchy.
Definition Metadata.h:64
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
void eraseDeadInstructions(Value *Root)
Remove inserted instructions that are dead, e.g.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
LLVM_ABI bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
static LLVM_ABI Intrinsic::ID getForIntrinsic(Intrinsic::ID Id)
The llvm.vp.
static LLVM_ABI bool isVPReduction(Intrinsic::ID ID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:682
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
LLVM_ABI std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
LLVM_ABI Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
LLVM_ABI bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
LLVM_ABI std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
auto successors(const MachineBasicBlock *BB)
BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget)
Based on branch weight metadata, return either:
LLVM_ABI void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
constexpr from_range_t from_range
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition LCSSA.cpp:449
LLVM_ABI Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI char & LCSSAID
Definition LCSSA.cpp:526
LLVM_ABI char & LoopSimplifyID
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
LLVM_ABI SmallVector< BasicBlock *, 16 > collectChildrenInLoop(DominatorTree *DT, DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:458
LLVM_ABI TransformationMode hasVectorizeTransformation(const Loop *L)
bool setBranchProbability(BranchInst *B, BranchProbability P, bool ForFirstTarget)
Set branch weight metadata for B to indicate that P and 1 - P are the probabilities of control flowin...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK)
Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence kind.
LLVM_ABI bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
LLVM_ABI void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
BranchProbability getLoopProbability(Loop *L)
Based on branch weight metadata, return either:
LLVM_ABI TransformationMode hasDistributeTransformation(const Loop *L)
LLVM_ABI void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition Local.cpp:2513
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
bool setLoopProbability(Loop *L, BranchProbability P)
Set branch weight metadata for the latch of L to indicate that, at the end of any iteration,...
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
LLVM_ABI CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L)
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
TransformationMode
The mode sets how eager a transformation should be applied.
Definition LoopUtils.h:284
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition LoopUtils.h:287
@ TM_SuppressedByUser
The transformation must not be applied.
Definition LoopUtils.h:307
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition LoopUtils.h:301
@ TM_Disable
The transformation should not be applied.
Definition LoopUtils.h:293
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition LoopUtils.h:290
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
template LLVM_TEMPLATE_ABI void appendLoopsToWorklist< Loop & >(Loop &L, SmallPriorityWorklist< Loop *, 4 > &Worklist)
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI const char * LLVMLoopEstimatedTripCount
Profile-based loop metadata that should be accessed only by using llvm::getLoopEstimatedTripCount and...
LLVM_ABI bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
auto predecessors(const MachineBasicBlock *BB)
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
LLVM_ABI Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
LLVM_ABI RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
ReplaceExitVal
Definition LoopUtils.h:554
@ UnusedIndVarInLoop
Definition LoopUtils.h:558
@ OnlyCheapRepl
Definition LoopUtils.h:556
@ AlwaysRepl
Definition LoopUtils.h:559
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
LLVM_ABI bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
LLVM_ABI bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
LLVM_ABI Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
LLVM_ABI MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
@ Enable
Enable colors.
Definition WithColor.h:47
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
DbgLoop(const Loop *L)
const Loop * L
IR Values for the lower and upper bounds of a pointer evolution.
TrackingVH< Value > Start
TrackingVH< Value > End
Value * StrideToCheck
unsigned Ith
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
const SCEV * ExpansionSCEV
PHINode * PN
Instruction * ExpansionPoint
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:626
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.