LLVM 20.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/Value.h"
35#include "llvm/Pass.h"
36#include "llvm/PassRegistry.h"
38#include "llvm/Support/Debug.h"
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
51static cl::opt<bool>
52ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
55static cl::opt<bool>
57 "force-hardware-loop-phi", cl::Hidden, cl::init(false),
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
60static cl::opt<bool>
61ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
62 cl::desc("Force allowance of nested hardware loops"));
63
65LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
66 cl::desc("Set the loop decrement value"));
67
69CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
70 cl::desc("Set the loop counter bitwidth"));
71
72static cl::opt<bool>
74 "force-hardware-loop-guard", cl::Hidden, cl::init(false),
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
80static void debugHWLoopFailure(const StringRef DebugMsg,
81 Instruction *I) {
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
84 dbgs() << ' ' << *I;
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
93 Value *CodeRegion = L->getHeader();
94 DebugLoc DL = L->getStartLoc();
95
96 if (I) {
97 CodeRegion = I->getParent();
98 // If there is no debug location attached to the instruction, revert back to
99 // using the loop's.
100 if (I->getDebugLoc())
101 DL = I->getDebugLoc();
102 }
103
104 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
114 ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
115 }
116
117 using TTI = TargetTransformInfo;
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {
125 }
126
127 bool runOnFunction(Function &F) override;
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 }
141 };
142
143 class HardwareLoopsImpl {
144 public:
145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
146 DominatorTree &DT, const DataLayout &DL,
150 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
151 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
152
153 bool run(Function &F);
154
155 private:
156 // Try to convert the given Loop into a hardware loop.
157 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
158
159 // Given that the target believes the loop to be profitable, try to
160 // convert it.
161 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
162
163 ScalarEvolution &SE;
164 LoopInfo &LI;
165 bool PreserveLCSSA;
166 DominatorTree &DT;
167 const DataLayout &DL;
169 TargetLibraryInfo *TLI = nullptr;
170 AssumptionCache &AC;
173 bool MadeChange = false;
174 };
175
176 class HardwareLoop {
177 // Expand the trip count scev into a value that we can use.
178 Value *InitLoopCount();
179
180 // Insert the set_loop_iteration intrinsic.
181 Value *InsertIterationSetup(Value *LoopCountInit);
182
183 // Insert the loop_decrement intrinsic.
184 void InsertLoopDec();
185
186 // Insert the loop_decrement_reg intrinsic.
187 Instruction *InsertLoopRegDec(Value *EltsRem);
188
189 // If the target requires the counter value to be updated in the loop,
190 // insert a phi to hold the value. The intended purpose is for use by
191 // loop_decrement_reg.
192 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
193
194 // Create a new cmp, that checks the returned value of loop_decrement*,
195 // and update the exit branch to use it.
196 void UpdateBranch(Value *EltsRem);
197
198 public:
199 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
200 const DataLayout &DL,
202 HardwareLoopOptions &Opts) :
203 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
204 ExitCount(Info.ExitCount),
205 CountType(Info.CountType),
206 ExitBranch(Info.ExitBranch),
208 UsePHICounter(Info.CounterInReg),
209 UseLoopGuard(Info.PerformEntryTest) { }
210
211 void Create();
212
213 private:
214 ScalarEvolution &SE;
215 const DataLayout &DL;
216 OptimizationRemarkEmitter *ORE = nullptr;
218 Loop *L = nullptr;
219 Module *M = nullptr;
220 const SCEV *ExitCount = nullptr;
221 Type *CountType = nullptr;
222 BranchInst *ExitBranch = nullptr;
223 Value *LoopDecrement = nullptr;
224 bool UsePHICounter = false;
225 bool UseLoopGuard = false;
226 BasicBlock *BeginBB = nullptr;
227 };
228}
229
230char HardwareLoopsLegacy::ID = 0;
231
232bool HardwareLoopsLegacy::runOnFunction(Function &F) {
233 if (skipFunction(F))
234 return false;
235
236 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
237
238 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
239 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
240 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
241 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
242 auto &DL = F.getDataLayout();
243 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
244 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
245 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
246 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
247 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
248
250 if (ForceHardwareLoops.getNumOccurrences())
252 if (ForceHardwareLoopPHI.getNumOccurrences())
254 if (ForceNestedLoop.getNumOccurrences())
256 if (ForceGuardLoopEntry.getNumOccurrences())
258 if (LoopDecrement.getNumOccurrences())
260 if (CounterBitWidth.getNumOccurrences())
262
263 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
264 Opts);
265 return Impl.run(F);
266}
267
270 auto &LI = AM.getResult<LoopAnalysis>(F);
271 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
272 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
273 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
274 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
275 auto &AC = AM.getResult<AssumptionAnalysis>(F);
277 auto &DL = F.getDataLayout();
278
279 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
280 bool Changed = Impl.run(F);
281 if (!Changed)
282 return PreservedAnalyses::all();
283
289 return PA;
290}
291
292bool HardwareLoopsImpl::run(Function &F) {
293 LLVMContext &Ctx = F.getContext();
294 for (Loop *L : LI)
295 if (L->isOutermost())
296 TryConvertLoop(L, Ctx);
297 return MadeChange;
298}
299
300// Return true if the search should stop, which will be when an inner loop is
301// converted and the parent loop doesn't support containing a hardware loop.
302bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
303 // Process nested loops first.
304 bool AnyChanged = false;
305 for (Loop *SL : *L)
306 AnyChanged |= TryConvertLoop(SL, Ctx);
307 if (AnyChanged) {
308 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
309 ORE, L);
310 return true; // Stop search.
311 }
312
313 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
314
315 HardwareLoopInfo HWLoopInfo(L);
316 if (!HWLoopInfo.canAnalyze(LI)) {
317 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
318 "HWLoopCannotAnalyze", ORE, L);
319 return false;
320 }
321
322 if (!Opts.Force &&
323 !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
324 reportHWLoopFailure("it's not profitable to create a hardware-loop",
325 "HWLoopNotProfitable", ORE, L);
326 return false;
327 }
328
329 // Allow overriding of the counter width and loop decrement value.
330 if (Opts.Bitwidth.has_value()) {
331 HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
332 }
333
334 if (Opts.Decrement.has_value())
335 HWLoopInfo.LoopDecrement =
336 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
337
338 MadeChange |= TryConvertLoop(HWLoopInfo);
339 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
340}
341
342bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
343
344 Loop *L = HWLoopInfo.L;
345 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
346
347 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
348 Opts.getForcePhi())) {
349 // TODO: there can be many reasons a loop is not considered a
350 // candidate, so we should let isHardwareLoopCandidate fill in the
351 // reason and then report a better message here.
352 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
353 return false;
354 }
355
356 assert(
357 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
358 "Hardware Loop must have set exit info.");
359
360 BasicBlock *Preheader = L->getLoopPreheader();
361
362 // If we don't have a preheader, then insert one.
363 if (!Preheader)
364 Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
365 if (!Preheader)
366 return false;
367
368 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
369 HWLoop.Create();
370 ++NumHWLoops;
371 return true;
372}
373
374void HardwareLoop::Create() {
375 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
376
377 Value *LoopCountInit = InitLoopCount();
378 if (!LoopCountInit) {
379 reportHWLoopFailure("could not safely create a loop count expression",
380 "HWLoopNotSafe", ORE, L);
381 return;
382 }
383
384 Value *Setup = InsertIterationSetup(LoopCountInit);
385
386 if (UsePHICounter || Opts.ForcePhi) {
387 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
388 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
389 LoopDec->setOperand(0, EltsRem);
390 UpdateBranch(LoopDec);
391 } else
392 InsertLoopDec();
393
394 // Run through the basic blocks of the loop and see if any of them have dead
395 // PHIs that can be removed.
396 for (auto *I : L->blocks())
398}
399
400static bool CanGenerateTest(Loop *L, Value *Count) {
401 BasicBlock *Preheader = L->getLoopPreheader();
402 if (!Preheader->getSinglePredecessor())
403 return false;
404
405 BasicBlock *Pred = Preheader->getSinglePredecessor();
406 if (!isa<BranchInst>(Pred->getTerminator()))
407 return false;
408
409 auto *BI = cast<BranchInst>(Pred->getTerminator());
410 if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
411 return false;
412
413 // Check that the icmp is checking for equality of Count and zero and that
414 // a non-zero value results in entering the loop.
415 auto ICmp = cast<ICmpInst>(BI->getCondition());
416 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
417 if (!ICmp->isEquality())
418 return false;
419
420 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
421 if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
422 return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
423 return false;
424 };
425
426 // Check if Count is a zext.
427 Value *CountBefZext =
428 isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
429
430 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
431 !IsCompareZero(ICmp, CountBefZext, 0) &&
432 !IsCompareZero(ICmp, CountBefZext, 1))
433 return false;
434
435 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
436 if (BI->getSuccessor(SuccIdx) != Preheader)
437 return false;
438
439 return true;
440}
441
442Value *HardwareLoop::InitLoopCount() {
443 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
444 // Can we replace a conditional branch with an intrinsic that sets the
445 // loop counter and tests that is not zero?
446
447 SCEVExpander SCEVE(SE, DL, "loopcnt");
448 if (!ExitCount->getType()->isPointerTy() &&
449 ExitCount->getType() != CountType)
450 ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
451
452 ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
453
454 // If we're trying to use the 'test and set' form of the intrinsic, we need
455 // to replace a conditional branch that is controlling entry to the loop. It
456 // is likely (guaranteed?) that the preheader has an unconditional branch to
457 // the loop header, so also check if it has a single predecessor.
458 if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
459 SE.getZero(ExitCount->getType()))) {
460 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
461 if (Opts.ForceGuard)
462 UseLoopGuard = true;
463 } else
464 UseLoopGuard = false;
465
466 BasicBlock *BB = L->getLoopPreheader();
467 if (UseLoopGuard && BB->getSinglePredecessor() &&
468 cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
469 BasicBlock *Predecessor = BB->getSinglePredecessor();
470 // If it's not safe to create a while loop then don't force it and create a
471 // do-while loop instead
472 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
473 UseLoopGuard = false;
474 else
475 BB = Predecessor;
476 }
477
478 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
479 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
480 << *ExitCount << "\n");
481 return nullptr;
482 }
483
484 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
485 BB->getTerminator());
486
487 // FIXME: We've expanded Count where we hope to insert the counter setting
488 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
489 // the just 'set' form and in which case the insertion block is most likely
490 // different. It means there will be instruction(s) in a block that possibly
491 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
492 // but it's doesn't appear to work in all cases.
493
494 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
495 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
496 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
497 << " - Expanded Count in " << BB->getName() << "\n"
498 << " - Will insert set counter intrinsic into: "
499 << BeginBB->getName() << "\n");
500 return Count;
501}
502
503Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
504 IRBuilder<> Builder(BeginBB->getTerminator());
505 if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
506 Builder.setIsFPConstrained(true);
507 Type *Ty = LoopCountInit->getType();
508 bool UsePhi = UsePHICounter || Opts.ForcePhi;
509 Intrinsic::ID ID = UseLoopGuard
510 ? (UsePhi ? Intrinsic::test_start_loop_iterations
511 : Intrinsic::test_set_loop_iterations)
512 : (UsePhi ? Intrinsic::start_loop_iterations
513 : Intrinsic::set_loop_iterations);
514 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);
515
516 // Use the return value of the intrinsic to control the entry of the loop.
517 if (UseLoopGuard) {
518 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
519 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
520 "Expected conditional branch");
521
522 Value *SetCount =
523 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
524 auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
525 LoopGuard->setCondition(SetCount);
526 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
527 LoopGuard->swapSuccessors();
528 }
529 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
530 << "\n");
531 if (UsePhi && UseLoopGuard)
532 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
533 return !UsePhi ? LoopCountInit : LoopSetup;
534}
535
536void HardwareLoop::InsertLoopDec() {
537 IRBuilder<> CondBuilder(ExitBranch);
538 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
539 Attribute::StrictFP))
540 CondBuilder.setIsFPConstrained(true);
541
542 Value *Ops[] = { LoopDecrement };
543 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,
544 LoopDecrement->getType(), Ops);
545 Value *OldCond = ExitBranch->getCondition();
546 ExitBranch->setCondition(NewCond);
547
548 // The false branch must exit the loop.
549 if (!L->contains(ExitBranch->getSuccessor(0)))
550 ExitBranch->swapSuccessors();
551
552 // The old condition may be dead now, and may have even created a dead PHI
553 // (the original induction variable).
555
556 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
557}
558
559Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
560 IRBuilder<> CondBuilder(ExitBranch);
561 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
562 Attribute::StrictFP))
563 CondBuilder.setIsFPConstrained(true);
564
565 Value *Ops[] = { EltsRem, LoopDecrement };
566 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,
567 {EltsRem->getType()}, Ops);
568
569 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
570 return cast<Instruction>(Call);
571}
572
573PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
574 BasicBlock *Preheader = L->getLoopPreheader();
575 BasicBlock *Header = L->getHeader();
576 BasicBlock *Latch = ExitBranch->getParent();
577 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
578 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
579 Index->addIncoming(NumElts, Preheader);
580 Index->addIncoming(EltsRem, Latch);
581 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
582 return Index;
583}
584
585void HardwareLoop::UpdateBranch(Value *EltsRem) {
586 IRBuilder<> CondBuilder(ExitBranch);
587 Value *NewCond =
588 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
589 Value *OldCond = ExitBranch->getCondition();
590 ExitBranch->setCondition(NewCond);
591
592 // The false branch must exit the loop.
593 if (!L->contains(ExitBranch->getSuccessor(0)))
594 ExitBranch->swapSuccessors();
595
596 // The old condition may be dead now, and may have even created a dead PHI
597 // (the original induction variable).
599}
600
601INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
607
608FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
#define DEBUG_TYPE
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
#define HW_LOOPS_NAME
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
#define DEBUG_TYPE
static bool CanGenerateTest(Loop *L, Value *Count)
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
Analysis pass which computes BranchProbabilityInfo.
Legacy analysis pass which computes BranchProbabilityInfo.
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
char & LCSSAID
Definition: LCSSA.cpp:542
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void initializeHardwareLoopsLegacyPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
Attributes of a target dependent hardware loop.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
Definition: HardwareLoops.h:24
HardwareLoopOptions & setForceNested(bool Force)
Definition: HardwareLoops.h:45
std::optional< bool > ForceGuard
Definition: HardwareLoops.h:27
std::optional< unsigned > Decrement
Definition: HardwareLoops.h:22
HardwareLoopOptions & setDecrement(unsigned Count)
Definition: HardwareLoops.h:29
HardwareLoopOptions & setForceGuard(bool Force)
Definition: HardwareLoops.h:49
HardwareLoopOptions & setForce(bool Force)
Definition: HardwareLoops.h:37
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
Definition: HardwareLoops.h:33
std::optional< unsigned > Bitwidth
Definition: HardwareLoops.h:23
HardwareLoopOptions & setForcePhi(bool Force)
Definition: HardwareLoops.h:41
std::optional< bool > ForcePhi
Definition: HardwareLoops.h:25
std::optional< bool > ForceNested
Definition: HardwareLoops.h:26