45#define DEBUG_TYPE "hardware-loops"
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
53 cl::desc(
"Force hardware loops intrinsics to be inserted"));
58 cl::desc(
"Force hardware loop counter to be updated through a phi"));
62 cl::desc(
"Force allowance of nested hardware loops"));
66 cl::desc(
"Set the loop decrement value"));
70 cl::desc(
"Set the loop counter bitwidth"));
75 cl::desc(
"Force generation of loop guard intrinsic"));
77STATISTIC(NumHWLoops,
"Number of loops converted to hardware loops");
82 dbgs() <<
"HWLoops: " << DebugMsg;
97 CodeRegion =
I->getParent();
100 if (
I->getDebugLoc())
101 DL =
I->getDebugLoc();
105 R <<
"hardware-loop not created: ";
123 HardwareLoopsLegacy() : FunctionPass(ID) {}
127 void getAnalysisUsage(AnalysisUsage &AU)
const override {
136 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
141 class HardwareLoopsImpl {
143 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI,
bool PreserveLCSSA,
144 DominatorTree &DT,
const TargetTransformInfo &TTI,
145 TargetLibraryInfo *TLI, AssumptionCache &AC,
146 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
147 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), TTI(TTI),
148 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) {}
150 bool run(Function &
F);
154 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
158 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
164 const TargetTransformInfo &TTI;
165 TargetLibraryInfo *TLI =
nullptr;
167 OptimizationRemarkEmitter *ORE;
168 HardwareLoopOptions &Opts;
169 bool MadeChange =
false;
174 Value *InitLoopCount();
177 Value *InsertIterationSetup(
Value *LoopCountInit);
180 void InsertLoopDec();
188 PHINode *InsertPHICounter(
Value *NumElts,
Value *EltsRem);
192 void UpdateBranch(
Value *EltsRem);
195 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
196 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
197 : SE(SE), ORE(ORE), Opts(Opts), L(
Info.L),
198 M(L->getHeader()->getModule()), ExitCount(
Info.ExitCount),
199 CountType(
Info.CountType), ExitBranch(
Info.ExitBranch),
200 LoopDecrement(
Info.LoopDecrement), UsePHICounter(
Info.CounterInReg),
201 UseLoopGuard(
Info.PerformEntryTest) {}
207 OptimizationRemarkEmitter *ORE =
nullptr;
208 HardwareLoopOptions &Opts;
211 const SCEV *ExitCount =
nullptr;
212 Type *CountType =
nullptr;
213 CondBrInst *ExitBranch =
nullptr;
214 Value *LoopDecrement =
nullptr;
215 bool UsePHICounter =
false;
216 bool UseLoopGuard =
false;
221char HardwareLoopsLegacy::ID = 0;
223bool HardwareLoopsLegacy::runOnFunction(
Function &
F) {
229 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
230 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
231 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
232 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
233 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
234 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
235 auto *TLI = TLIP ? &TLIP->getTLI(
F) :
nullptr;
236 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
237 bool PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
239 HardwareLoopOptions Opts;
253 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT,
TTI, TLI, AC, ORE, Opts);
267 HardwareLoopsImpl Impl(SE, LI,
true, DT,
TTI, TLI, AC, ORE, Opts);
280bool HardwareLoopsImpl::run(
Function &
F) {
283 if (L->isOutermost())
284 TryConvertLoop(L, Ctx);
292 bool AnyChanged =
false;
294 AnyChanged |= TryConvertLoop(SL, Ctx);
296 reportHWLoopFailure(
"nested hardware-loops not supported",
"HWLoopNested",
301 LLVM_DEBUG(
dbgs() <<
"HWLoops: Loop " <<
L->getHeader()->getName() <<
"\n");
303 HardwareLoopInfo HWLoopInfo(L);
304 if (!HWLoopInfo.canAnalyze(LI)) {
305 reportHWLoopFailure(
"cannot analyze loop, irreducible control flow",
306 "HWLoopCannotAnalyze", ORE, L);
312 reportHWLoopFailure(
"it's not profitable to create a hardware-loop",
313 "HWLoopNotProfitable", ORE, L);
323 HWLoopInfo.LoopDecrement =
324 ConstantInt::get(HWLoopInfo.CountType, Opts.
Decrement.value());
326 MadeChange |= TryConvertLoop(HWLoopInfo);
327 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.
ForceNested);
330bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
332 Loop *
L = HWLoopInfo.
L;
333 LLVM_DEBUG(
dbgs() <<
"HWLoops: Try to convert profitable loop: " << *L);
340 reportHWLoopFailure(
"loop is not a candidate",
"HWLoopNoCandidate", ORE, L);
346 "Hardware Loop must have set exit info.");
356 HardwareLoop HWLoop(HWLoopInfo, SE, ORE, Opts);
362void HardwareLoop::Create() {
365 Value *LoopCountInit = InitLoopCount();
366 if (!LoopCountInit) {
367 reportHWLoopFailure(
"could not safely create a loop count expression",
368 "HWLoopNotSafe", ORE, L);
372 Value *
Setup = InsertIterationSetup(LoopCountInit);
374 if (UsePHICounter || Opts.
ForcePhi) {
375 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
376 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
378 UpdateBranch(LoopDec);
384 for (
auto *
I :
L->blocks())
389 BasicBlock *Preheader = L->getLoopPreheader();
402 if (!ICmp->isEquality())
412 Value *CountBefZext =
415 if (!IsCompareZero(ICmp,
Count, 0) && !IsCompareZero(ICmp,
Count, 1) &&
416 !IsCompareZero(ICmp, CountBefZext, 0) &&
417 !IsCompareZero(ICmp, CountBefZext, 1))
421 if (BI->getSuccessor(SuccIdx) != Preheader)
427Value *HardwareLoop::InitLoopCount() {
428 LLVM_DEBUG(
dbgs() <<
"HWLoops: Initialising loop counter value:\n");
432 SCEVExpander SCEVE(SE,
"loopcnt");
434 ExitCount->
getType() != CountType)
449 UseLoopGuard =
false;
457 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->
getTerminator()))
458 UseLoopGuard =
false;
463 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->
getTerminator())) {
465 << *ExitCount <<
"\n");
469 Value *
Count = SCEVE.expandCodeFor(ExitCount, CountType,
480 BeginBB = UseLoopGuard ? BB :
L->getLoopPreheader();
482 <<
" - Expanded Count in " << BB->
getName() <<
"\n"
483 <<
" - Will insert set counter intrinsic into: "
484 << BeginBB->
getName() <<
"\n");
488Value* HardwareLoop::InsertIterationSetup(
Value *LoopCountInit) {
491 Builder.setIsFPConstrained(
true);
493 bool UsePhi = UsePHICounter || Opts.
ForcePhi;
495 ? (UsePhi ? Intrinsic::test_start_loop_iterations
496 : Intrinsic::test_set_loop_iterations)
497 : (UsePhi ?
Intrinsic::start_loop_iterations
499 Value *LoopSetup = Builder.CreateIntrinsic(
ID, Ty, LoopCountInit);
504 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
506 LoopGuard->setCondition(SetCount);
507 if (LoopGuard->getSuccessor(0) !=
L->getLoopPreheader())
508 LoopGuard->swapSuccessors();
510 LLVM_DEBUG(
dbgs() <<
"HWLoops: Inserted loop counter: " << *LoopSetup
512 if (UsePhi && UseLoopGuard)
513 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
514 return !UsePhi ? LoopCountInit : LoopSetup;
517void HardwareLoop::InsertLoopDec() {
519 if (ExitBranch->
getParent()->getParent()->getAttributes().hasFnAttr(
520 Attribute::StrictFP))
521 CondBuilder.setIsFPConstrained(
true);
524 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,
537 LLVM_DEBUG(
dbgs() <<
"HWLoops: Inserted loop dec: " << *NewCond <<
"\n");
542 if (ExitBranch->
getParent()->getParent()->getAttributes().hasFnAttr(
543 Attribute::StrictFP))
544 CondBuilder.setIsFPConstrained(
true);
547 Value *
Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,
554PHINode* HardwareLoop::InsertPHICounter(
Value *NumElts,
Value *EltsRem) {
558 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
559 PHINode *
Index = Builder.CreatePHI(NumElts->
getType(), 2);
560 Index->addIncoming(NumElts, Preheader);
561 Index->addIncoming(EltsRem, Latch);
566void HardwareLoop::UpdateBranch(
Value *EltsRem) {
569 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->
getType(), 0));
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
static bool CanGenerateTest(Loop *L, Value *Count)
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Machine Check Debug Module
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BranchProbabilityInfo.
Predicate getPredicate() const
Return the predicate for this instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
Analysis pass which computes a DominatorTree.
FunctionPass class - This class is used to implement most global optimizations.
AttributeList getAttributes() const
Return the attribute list for this Function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This instruction compares its operands according to the predicate given to the constructor.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
Analysis pass that exposes the LoopInfo for a function.
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
bool isPointerTy() const
True if this is an instance of PointerType.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
HardwareLoopOptions & setForceNested(bool Force)
std::optional< bool > ForceGuard
std::optional< unsigned > Decrement
HardwareLoopOptions & setDecrement(unsigned Count)
HardwareLoopOptions & setForceGuard(bool Force)
HardwareLoopOptions & setForce(bool Force)
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
std::optional< unsigned > Bitwidth
HardwareLoopOptions & setForcePhi(bool Force)
std::optional< bool > ForcePhi
std::optional< bool > ForceNested
bool getForceNested() const