LLVM 19.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/Value.h"
36#include "llvm/Pass.h"
37#include "llvm/PassRegistry.h"
39#include "llvm/Support/Debug.h"
45
46#define DEBUG_TYPE "hardware-loops"
47
48#define HW_LOOPS_NAME "Hardware Loop Insertion"
49
50using namespace llvm;
51
52static cl::opt<bool>
53ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
54 cl::desc("Force hardware loops intrinsics to be inserted"));
55
56static cl::opt<bool>
58 "force-hardware-loop-phi", cl::Hidden, cl::init(false),
59 cl::desc("Force hardware loop counter to be updated through a phi"));
60
61static cl::opt<bool>
62ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
63 cl::desc("Force allowance of nested hardware loops"));
64
66LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
67 cl::desc("Set the loop decrement value"));
68
70CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
71 cl::desc("Set the loop counter bitwidth"));
72
73static cl::opt<bool>
75 "force-hardware-loop-guard", cl::Hidden, cl::init(false),
76 cl::desc("Force generation of loop guard intrinsic"));
77
78STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
79
80#ifndef NDEBUG
81static void debugHWLoopFailure(const StringRef DebugMsg,
82 Instruction *I) {
83 dbgs() << "HWLoops: " << DebugMsg;
84 if (I)
85 dbgs() << ' ' << *I;
86 else
87 dbgs() << '.';
88 dbgs() << '\n';
89}
90#endif
91
94 Value *CodeRegion = L->getHeader();
95 DebugLoc DL = L->getStartLoc();
96
97 if (I) {
98 CodeRegion = I->getParent();
99 // If there is no debug location attached to the instruction, revert back to
100 // using the loop's.
101 if (I->getDebugLoc())
102 DL = I->getDebugLoc();
103 }
104
105 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
106 R << "hardware-loop not created: ";
107 return R;
108}
109
110namespace {
111
112 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
113 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
115 ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
116 }
117
118 using TTI = TargetTransformInfo;
119
120 class HardwareLoopsLegacy : public FunctionPass {
121 public:
122 static char ID;
123
124 HardwareLoopsLegacy() : FunctionPass(ID) {
126 }
127
128 bool runOnFunction(Function &F) override;
129
130 void getAnalysisUsage(AnalysisUsage &AU) const override {
141 }
142 };
143
144 class HardwareLoopsImpl {
145 public:
146 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
147 DominatorTree &DT, const DataLayout &DL,
151 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
152 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
153
154 bool run(Function &F);
155
156 private:
157 // Try to convert the given Loop into a hardware loop.
158 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
159
160 // Given that the target believes the loop to be profitable, try to
161 // convert it.
162 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
163
164 ScalarEvolution &SE;
165 LoopInfo &LI;
166 bool PreserveLCSSA;
167 DominatorTree &DT;
168 const DataLayout &DL;
170 TargetLibraryInfo *TLI = nullptr;
171 AssumptionCache &AC;
174 bool MadeChange = false;
175 };
176
177 class HardwareLoop {
178 // Expand the trip count scev into a value that we can use.
179 Value *InitLoopCount();
180
181 // Insert the set_loop_iteration intrinsic.
182 Value *InsertIterationSetup(Value *LoopCountInit);
183
184 // Insert the loop_decrement intrinsic.
185 void InsertLoopDec();
186
187 // Insert the loop_decrement_reg intrinsic.
188 Instruction *InsertLoopRegDec(Value *EltsRem);
189
190 // If the target requires the counter value to be updated in the loop,
191 // insert a phi to hold the value. The intended purpose is for use by
192 // loop_decrement_reg.
193 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
194
195 // Create a new cmp, that checks the returned value of loop_decrement*,
196 // and update the exit branch to use it.
197 void UpdateBranch(Value *EltsRem);
198
199 public:
200 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
201 const DataLayout &DL,
203 HardwareLoopOptions &Opts) :
204 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
205 ExitCount(Info.ExitCount),
206 CountType(Info.CountType),
207 ExitBranch(Info.ExitBranch),
209 UsePHICounter(Info.CounterInReg),
210 UseLoopGuard(Info.PerformEntryTest) { }
211
212 void Create();
213
214 private:
215 ScalarEvolution &SE;
216 const DataLayout &DL;
217 OptimizationRemarkEmitter *ORE = nullptr;
219 Loop *L = nullptr;
220 Module *M = nullptr;
221 const SCEV *ExitCount = nullptr;
222 Type *CountType = nullptr;
223 BranchInst *ExitBranch = nullptr;
224 Value *LoopDecrement = nullptr;
225 bool UsePHICounter = false;
226 bool UseLoopGuard = false;
227 BasicBlock *BeginBB = nullptr;
228 };
229}
230
231char HardwareLoopsLegacy::ID = 0;
232
233bool HardwareLoopsLegacy::runOnFunction(Function &F) {
234 if (skipFunction(F))
235 return false;
236
237 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
238
239 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
240 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
241 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
242 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
243 auto &DL = F.getParent()->getDataLayout();
244 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
245 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
246 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
247 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
248 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
249
251 if (ForceHardwareLoops.getNumOccurrences())
253 if (ForceHardwareLoopPHI.getNumOccurrences())
255 if (ForceNestedLoop.getNumOccurrences())
257 if (ForceGuardLoopEntry.getNumOccurrences())
259 if (LoopDecrement.getNumOccurrences())
261 if (CounterBitWidth.getNumOccurrences())
263
264 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
265 Opts);
266 return Impl.run(F);
267}
268
271 auto &LI = AM.getResult<LoopAnalysis>(F);
272 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
273 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
274 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
275 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
276 auto &AC = AM.getResult<AssumptionAnalysis>(F);
278 auto &DL = F.getParent()->getDataLayout();
279
280 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
281 bool Changed = Impl.run(F);
282 if (!Changed)
283 return PreservedAnalyses::all();
284
290 return PA;
291}
292
293bool HardwareLoopsImpl::run(Function &F) {
294 LLVMContext &Ctx = F.getParent()->getContext();
295 for (Loop *L : LI)
296 if (L->isOutermost())
297 TryConvertLoop(L, Ctx);
298 return MadeChange;
299}
300
301// Return true if the search should stop, which will be when an inner loop is
302// converted and the parent loop doesn't support containing a hardware loop.
303bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
304 // Process nested loops first.
305 bool AnyChanged = false;
306 for (Loop *SL : *L)
307 AnyChanged |= TryConvertLoop(SL, Ctx);
308 if (AnyChanged) {
309 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
310 ORE, L);
311 return true; // Stop search.
312 }
313
314 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
315
316 HardwareLoopInfo HWLoopInfo(L);
317 if (!HWLoopInfo.canAnalyze(LI)) {
318 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
319 "HWLoopCannotAnalyze", ORE, L);
320 return false;
321 }
322
323 if (!Opts.Force &&
324 !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
325 reportHWLoopFailure("it's not profitable to create a hardware-loop",
326 "HWLoopNotProfitable", ORE, L);
327 return false;
328 }
329
330 // Allow overriding of the counter width and loop decrement value.
331 if (Opts.Bitwidth.has_value()) {
332 HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
333 }
334
335 if (Opts.Decrement.has_value())
336 HWLoopInfo.LoopDecrement =
337 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
338
339 MadeChange |= TryConvertLoop(HWLoopInfo);
340 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
341}
342
343bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
344
345 Loop *L = HWLoopInfo.L;
346 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
347
348 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
349 Opts.getForcePhi())) {
350 // TODO: there can be many reasons a loop is not considered a
351 // candidate, so we should let isHardwareLoopCandidate fill in the
352 // reason and then report a better message here.
353 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
354 return false;
355 }
356
357 assert(
358 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
359 "Hardware Loop must have set exit info.");
360
361 BasicBlock *Preheader = L->getLoopPreheader();
362
363 // If we don't have a preheader, then insert one.
364 if (!Preheader)
365 Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
366 if (!Preheader)
367 return false;
368
369 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
370 HWLoop.Create();
371 ++NumHWLoops;
372 return true;
373}
374
375void HardwareLoop::Create() {
376 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
377
378 Value *LoopCountInit = InitLoopCount();
379 if (!LoopCountInit) {
380 reportHWLoopFailure("could not safely create a loop count expression",
381 "HWLoopNotSafe", ORE, L);
382 return;
383 }
384
385 Value *Setup = InsertIterationSetup(LoopCountInit);
386
387 if (UsePHICounter || Opts.ForcePhi) {
388 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
389 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
390 LoopDec->setOperand(0, EltsRem);
391 UpdateBranch(LoopDec);
392 } else
393 InsertLoopDec();
394
395 // Run through the basic blocks of the loop and see if any of them have dead
396 // PHIs that can be removed.
397 for (auto *I : L->blocks())
399}
400
401static bool CanGenerateTest(Loop *L, Value *Count) {
402 BasicBlock *Preheader = L->getLoopPreheader();
403 if (!Preheader->getSinglePredecessor())
404 return false;
405
406 BasicBlock *Pred = Preheader->getSinglePredecessor();
407 if (!isa<BranchInst>(Pred->getTerminator()))
408 return false;
409
410 auto *BI = cast<BranchInst>(Pred->getTerminator());
411 if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
412 return false;
413
414 // Check that the icmp is checking for equality of Count and zero and that
415 // a non-zero value results in entering the loop.
416 auto ICmp = cast<ICmpInst>(BI->getCondition());
417 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
418 if (!ICmp->isEquality())
419 return false;
420
421 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
422 if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
423 return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
424 return false;
425 };
426
427 // Check if Count is a zext.
428 Value *CountBefZext =
429 isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
430
431 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
432 !IsCompareZero(ICmp, CountBefZext, 0) &&
433 !IsCompareZero(ICmp, CountBefZext, 1))
434 return false;
435
436 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
437 if (BI->getSuccessor(SuccIdx) != Preheader)
438 return false;
439
440 return true;
441}
442
443Value *HardwareLoop::InitLoopCount() {
444 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
445 // Can we replace a conditional branch with an intrinsic that sets the
446 // loop counter and tests that is not zero?
447
448 SCEVExpander SCEVE(SE, DL, "loopcnt");
449 if (!ExitCount->getType()->isPointerTy() &&
450 ExitCount->getType() != CountType)
451 ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
452
453 ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
454
455 // If we're trying to use the 'test and set' form of the intrinsic, we need
456 // to replace a conditional branch that is controlling entry to the loop. It
457 // is likely (guaranteed?) that the preheader has an unconditional branch to
458 // the loop header, so also check if it has a single predecessor.
459 if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
460 SE.getZero(ExitCount->getType()))) {
461 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
462 if (Opts.ForceGuard)
463 UseLoopGuard = true;
464 } else
465 UseLoopGuard = false;
466
467 BasicBlock *BB = L->getLoopPreheader();
468 if (UseLoopGuard && BB->getSinglePredecessor() &&
469 cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
470 BasicBlock *Predecessor = BB->getSinglePredecessor();
471 // If it's not safe to create a while loop then don't force it and create a
472 // do-while loop instead
473 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
474 UseLoopGuard = false;
475 else
476 BB = Predecessor;
477 }
478
479 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
480 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
481 << *ExitCount << "\n");
482 return nullptr;
483 }
484
485 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
486 BB->getTerminator());
487
488 // FIXME: We've expanded Count where we hope to insert the counter setting
489 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
490 // the just 'set' form and in which case the insertion block is most likely
491 // different. It means there will be instruction(s) in a block that possibly
492 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
493 // but it's doesn't appear to work in all cases.
494
495 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
496 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
497 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
498 << " - Expanded Count in " << BB->getName() << "\n"
499 << " - Will insert set counter intrinsic into: "
500 << BeginBB->getName() << "\n");
501 return Count;
502}
503
504Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
505 IRBuilder<> Builder(BeginBB->getTerminator());
506 if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
507 Builder.setIsFPConstrained(true);
508 Type *Ty = LoopCountInit->getType();
509 bool UsePhi = UsePHICounter || Opts.ForcePhi;
510 Intrinsic::ID ID = UseLoopGuard
511 ? (UsePhi ? Intrinsic::test_start_loop_iterations
512 : Intrinsic::test_set_loop_iterations)
513 : (UsePhi ? Intrinsic::start_loop_iterations
514 : Intrinsic::set_loop_iterations);
515 Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
516 Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
517
518 // Use the return value of the intrinsic to control the entry of the loop.
519 if (UseLoopGuard) {
520 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
521 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
522 "Expected conditional branch");
523
524 Value *SetCount =
525 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
526 auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
527 LoopGuard->setCondition(SetCount);
528 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
529 LoopGuard->swapSuccessors();
530 }
531 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
532 << "\n");
533 if (UsePhi && UseLoopGuard)
534 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
535 return !UsePhi ? LoopCountInit : LoopSetup;
536}
537
538void HardwareLoop::InsertLoopDec() {
539 IRBuilder<> CondBuilder(ExitBranch);
540 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
541 Attribute::StrictFP))
542 CondBuilder.setIsFPConstrained(true);
543
544 Function *DecFunc =
545 Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
546 LoopDecrement->getType());
547 Value *Ops[] = { LoopDecrement };
548 Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
549 Value *OldCond = ExitBranch->getCondition();
550 ExitBranch->setCondition(NewCond);
551
552 // The false branch must exit the loop.
553 if (!L->contains(ExitBranch->getSuccessor(0)))
554 ExitBranch->swapSuccessors();
555
556 // The old condition may be dead now, and may have even created a dead PHI
557 // (the original induction variable).
559
560 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
561}
562
563Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
564 IRBuilder<> CondBuilder(ExitBranch);
565 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
566 Attribute::StrictFP))
567 CondBuilder.setIsFPConstrained(true);
568
569 Function *DecFunc =
570 Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
571 { EltsRem->getType() });
572 Value *Ops[] = { EltsRem, LoopDecrement };
573 Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
574
575 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
576 return cast<Instruction>(Call);
577}
578
579PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
580 BasicBlock *Preheader = L->getLoopPreheader();
581 BasicBlock *Header = L->getHeader();
582 BasicBlock *Latch = ExitBranch->getParent();
583 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
584 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
585 Index->addIncoming(NumElts, Preheader);
586 Index->addIncoming(EltsRem, Latch);
587 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
588 return Index;
589}
590
591void HardwareLoop::UpdateBranch(Value *EltsRem) {
592 IRBuilder<> CondBuilder(ExitBranch);
593 Value *NewCond =
594 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
595 Value *OldCond = ExitBranch->getCondition();
596 ExitBranch->setCondition(NewCond);
597
598 // The false branch must exit the loop.
599 if (!L->contains(ExitBranch->getSuccessor(0)))
600 ExitBranch->swapSuccessors();
601
602 // The old condition may be dead now, and may have even created a dead PHI
603 // (the original induction variable).
605}
606
607INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
613
614FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define DEBUG_TYPE
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
#define HW_LOOPS_NAME
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
#define DEBUG_TYPE
static bool CanGenerateTest(Loop *L, Value *Count)
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
Conditional or Unconditional Branch instruction.
Analysis pass which computes BranchProbabilityInfo.
Legacy analysis pass which computes BranchProbabilityInfo.
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1461
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:539
char & LCSSAID
Definition: LCSSA.cpp:507
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void initializeHardwareLoopsLegacyPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
Attributes of a target dependent hardware loop.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
Definition: HardwareLoops.h:24
HardwareLoopOptions & setForceNested(bool Force)
Definition: HardwareLoops.h:45
std::optional< bool > ForceGuard
Definition: HardwareLoops.h:27
std::optional< unsigned > Decrement
Definition: HardwareLoops.h:22
HardwareLoopOptions & setDecrement(unsigned Count)
Definition: HardwareLoops.h:29
HardwareLoopOptions & setForceGuard(bool Force)
Definition: HardwareLoops.h:49
HardwareLoopOptions & setForce(bool Force)
Definition: HardwareLoops.h:37
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
Definition: HardwareLoops.h:33
std::optional< unsigned > Bitwidth
Definition: HardwareLoops.h:23
HardwareLoopOptions & setForcePhi(bool Force)
Definition: HardwareLoops.h:41
std::optional< bool > ForcePhi
Definition: HardwareLoops.h:25
std::optional< bool > ForceNested
Definition: HardwareLoops.h:26