LLVM  13.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Insert hardware loop intrinsics into loops which are deemed profitable by
10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11 /// two intrinsics: one, outside the loop, to set the loop iteration count and
12 /// another, in the exit block, to decrement the counter. The decremented value
13 /// can either be carried through the loop via a phi or handled in some opaque
14 /// way by the target.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/PassRegistry.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils.h"
46 
47 #define DEBUG_TYPE "hardware-loops"
48 
49 #define HW_LOOPS_NAME "Hardware Loop Insertion"
50 
51 using namespace llvm;
52 
53 static cl::opt<bool>
54 ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
55  cl::desc("Force hardware loops intrinsics to be inserted"));
56 
57 static cl::opt<bool>
59  "force-hardware-loop-phi", cl::Hidden, cl::init(false),
60  cl::desc("Force hardware loop counter to be updated through a phi"));
61 
62 static cl::opt<bool>
63 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
64  cl::desc("Force allowance of nested hardware loops"));
65 
66 static cl::opt<unsigned>
67 LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
68  cl::desc("Set the loop decrement value"));
69 
70 static cl::opt<unsigned>
71 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
72  cl::desc("Set the loop counter bitwidth"));
73 
74 static cl::opt<bool>
76  "force-hardware-loop-guard", cl::Hidden, cl::init(false),
77  cl::desc("Force generation of loop guard intrinsic"));
78 
79 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
80 
81 #ifndef NDEBUG
82 static void debugHWLoopFailure(const StringRef DebugMsg,
83  Instruction *I) {
84  dbgs() << "HWLoops: " << DebugMsg;
85  if (I)
86  dbgs() << ' ' << *I;
87  else
88  dbgs() << '.';
89  dbgs() << '\n';
90 }
91 #endif
92 
95  Value *CodeRegion = L->getHeader();
96  DebugLoc DL = L->getStartLoc();
97 
98  if (I) {
99  CodeRegion = I->getParent();
100  // If there is no debug location attached to the instruction, revert back to
101  // using the loop's.
102  if (I->getDebugLoc())
103  DL = I->getDebugLoc();
104  }
105 
106  OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
107  R << "hardware-loop not created: ";
108  return R;
109 }
110 
111 namespace {
112 
113  void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
114  OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
116  ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
117  }
118 
119  using TTI = TargetTransformInfo;
120 
121  class HardwareLoops : public FunctionPass {
122  public:
123  static char ID;
124 
125  HardwareLoops() : FunctionPass(ID) {
127  }
128 
129  bool runOnFunction(Function &F) override;
130 
131  void getAnalysisUsage(AnalysisUsage &AU) const override {
140  }
141 
142  // Try to convert the given Loop into a hardware loop.
143  bool TryConvertLoop(Loop *L);
144 
145  // Given that the target believes the loop to be profitable, try to
146  // convert it.
147  bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
148 
149  private:
150  ScalarEvolution *SE = nullptr;
151  LoopInfo *LI = nullptr;
152  const DataLayout *DL = nullptr;
153  OptimizationRemarkEmitter *ORE = nullptr;
154  const TargetTransformInfo *TTI = nullptr;
155  DominatorTree *DT = nullptr;
156  bool PreserveLCSSA = false;
157  AssumptionCache *AC = nullptr;
158  TargetLibraryInfo *LibInfo = nullptr;
159  Module *M = nullptr;
160  bool MadeChange = false;
161  };
162 
163  class HardwareLoop {
164  // Expand the trip count scev into a value that we can use.
165  Value *InitLoopCount();
166 
167  // Insert the set_loop_iteration intrinsic.
168  Value *InsertIterationSetup(Value *LoopCountInit);
169 
170  // Insert the loop_decrement intrinsic.
171  void InsertLoopDec();
172 
173  // Insert the loop_decrement_reg intrinsic.
174  Instruction *InsertLoopRegDec(Value *EltsRem);
175 
176  // If the target requires the counter value to be updated in the loop,
177  // insert a phi to hold the value. The intended purpose is for use by
178  // loop_decrement_reg.
179  PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
180 
181  // Create a new cmp, that checks the returned value of loop_decrement*,
182  // and update the exit branch to use it.
183  void UpdateBranch(Value *EltsRem);
184 
185  public:
186  HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
187  const DataLayout &DL,
189  SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
190  TripCount(Info.TripCount),
191  CountType(Info.CountType),
192  ExitBranch(Info.ExitBranch),
194  UsePHICounter(Info.CounterInReg),
195  UseLoopGuard(Info.PerformEntryTest) { }
196 
197  void Create();
198 
199  private:
200  ScalarEvolution &SE;
201  const DataLayout &DL;
202  OptimizationRemarkEmitter *ORE = nullptr;
203  Loop *L = nullptr;
204  Module *M = nullptr;
205  const SCEV *TripCount = nullptr;
206  Type *CountType = nullptr;
207  BranchInst *ExitBranch = nullptr;
208  Value *LoopDecrement = nullptr;
209  bool UsePHICounter = false;
210  bool UseLoopGuard = false;
211  BasicBlock *BeginBB = nullptr;
212  };
213 }
214 
215 char HardwareLoops::ID = 0;
216 
218  if (skipFunction(F))
219  return false;
220 
221  LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
222 
223  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
224  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
225  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
226  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
227  DL = &F.getParent()->getDataLayout();
228  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
229  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
230  LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
231  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
232  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
233  M = F.getParent();
234 
235  for (Loop *L : *LI)
236  if (L->isOutermost())
237  TryConvertLoop(L);
238 
239  return MadeChange;
240 }
241 
242 // Return true if the search should stop, which will be when an inner loop is
243 // converted and the parent loop doesn't support containing a hardware loop.
244 bool HardwareLoops::TryConvertLoop(Loop *L) {
245  // Process nested loops first.
246  bool AnyChanged = false;
247  for (Loop *SL : *L)
248  AnyChanged |= TryConvertLoop(SL);
249  if (AnyChanged) {
250  reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
251  ORE, L);
252  return true; // Stop search.
253  }
254 
255  LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
256 
257  HardwareLoopInfo HWLoopInfo(L);
258  if (!HWLoopInfo.canAnalyze(*LI)) {
259  reportHWLoopFailure("cannot analyze loop, irreducible control flow",
260  "HWLoopCannotAnalyze", ORE, L);
261  return false;
262  }
263 
264  if (!ForceHardwareLoops &&
265  !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
266  reportHWLoopFailure("it's not profitable to create a hardware-loop",
267  "HWLoopNotProfitable", ORE, L);
268  return false;
269  }
270 
271  // Allow overriding of the counter width and loop decrement value.
272  if (CounterBitWidth.getNumOccurrences())
273  HWLoopInfo.CountType =
274  IntegerType::get(M->getContext(), CounterBitWidth);
275 
276  if (LoopDecrement.getNumOccurrences())
277  HWLoopInfo.LoopDecrement =
278  ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
279 
280  MadeChange |= TryConvertLoop(HWLoopInfo);
281  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
282 }
283 
284 bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
285 
286  Loop *L = HWLoopInfo.L;
287  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
288 
289  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
291  // TODO: there can be many reasons a loop is not considered a
292  // candidate, so we should let isHardwareLoopCandidate fill in the
293  // reason and then report a better message here.
294  reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
295  return false;
296  }
297 
298  assert(
299  (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
300  "Hardware Loop must have set exit info.");
301 
302  BasicBlock *Preheader = L->getLoopPreheader();
303 
304  // If we don't have a preheader, then insert one.
305  if (!Preheader)
306  Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
307  if (!Preheader)
308  return false;
309 
310  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
311  HWLoop.Create();
312  ++NumHWLoops;
313  return true;
314 }
315 
316 void HardwareLoop::Create() {
317  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
318 
319  Value *LoopCountInit = InitLoopCount();
320  if (!LoopCountInit) {
321  reportHWLoopFailure("could not safely create a loop count expression",
322  "HWLoopNotSafe", ORE, L);
323  return;
324  }
325 
326  Value *Setup = InsertIterationSetup(LoopCountInit);
327 
328  if (UsePHICounter || ForceHardwareLoopPHI) {
329  Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
330  Value *EltsRem = InsertPHICounter(Setup, LoopDec);
331  LoopDec->setOperand(0, EltsRem);
332  UpdateBranch(LoopDec);
333  } else
334  InsertLoopDec();
335 
336  // Run through the basic blocks of the loop and see if any of them have dead
337  // PHIs that can be removed.
338  for (auto I : L->blocks())
339  DeleteDeadPHIs(I);
340 }
341 
342 static bool CanGenerateTest(Loop *L, Value *Count) {
343  BasicBlock *Preheader = L->getLoopPreheader();
344  if (!Preheader->getSinglePredecessor())
345  return false;
346 
347  BasicBlock *Pred = Preheader->getSinglePredecessor();
348  if (!isa<BranchInst>(Pred->getTerminator()))
349  return false;
350 
351  auto *BI = cast<BranchInst>(Pred->getTerminator());
352  if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
353  return false;
354 
355  // Check that the icmp is checking for equality of Count and zero and that
356  // a non-zero value results in entering the loop.
357  auto ICmp = cast<ICmpInst>(BI->getCondition());
358  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
359  if (!ICmp->isEquality())
360  return false;
361 
362  auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
363  if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
364  return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
365  return false;
366  };
367 
368  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
369  return false;
370 
371  unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
372  if (BI->getSuccessor(SuccIdx) != Preheader)
373  return false;
374 
375  return true;
376 }
377 
378 Value *HardwareLoop::InitLoopCount() {
379  LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
380  // Can we replace a conditional branch with an intrinsic that sets the
381  // loop counter and tests that is not zero?
382 
383  SCEVExpander SCEVE(SE, DL, "loopcnt");
384 
385  // If we're trying to use the 'test and set' form of the intrinsic, we need
386  // to replace a conditional branch that is controlling entry to the loop. It
387  // is likely (guaranteed?) that the preheader has an unconditional branch to
388  // the loop header, so also check if it has a single predecessor.
389  if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
390  SE.getZero(TripCount->getType()))) {
391  LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
392  UseLoopGuard |= ForceGuardLoopEntry;
393  } else
394  UseLoopGuard = false;
395 
397  if (UseLoopGuard && BB->getSinglePredecessor() &&
398  cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
399  BasicBlock *Predecessor = BB->getSinglePredecessor();
400  // If it's not safe to create a while loop then don't force it and create a
401  // do-while loop instead
402  if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
403  UseLoopGuard = false;
404  else
405  BB = Predecessor;
406  }
407 
408  if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
409  LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount "
410  << *TripCount << "\n");
411  return nullptr;
412  }
413 
414  Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
415  BB->getTerminator());
416 
417  // FIXME: We've expanded Count where we hope to insert the counter setting
418  // intrinsic. But, in the case of the 'test and set' form, we may fallback to
419  // the just 'set' form and in which case the insertion block is most likely
420  // different. It means there will be instruction(s) in a block that possibly
421  // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
422  // but it's doesn't appear to work in all cases.
423 
424  UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
425  BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
426  LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
427  << " - Expanded Count in " << BB->getName() << "\n"
428  << " - Will insert set counter intrinsic into: "
429  << BeginBB->getName() << "\n");
430  return Count;
431 }
432 
433 Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
434  IRBuilder<> Builder(BeginBB->getTerminator());
435  Type *Ty = LoopCountInit->getType();
436  bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
437  Intrinsic::ID ID = UseLoopGuard
438  ? (UsePhi ? Intrinsic::test_start_loop_iterations
439  : Intrinsic::test_set_loop_iterations)
440  : (UsePhi ? Intrinsic::start_loop_iterations
441  : Intrinsic::set_loop_iterations);
442  Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
443  Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
444 
445  // Use the return value of the intrinsic to control the entry of the loop.
446  if (UseLoopGuard) {
447  assert((isa<BranchInst>(BeginBB->getTerminator()) &&
448  cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
449  "Expected conditional branch");
450 
451  Value *SetCount =
452  UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
453  auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
454  LoopGuard->setCondition(SetCount);
455  if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
456  LoopGuard->swapSuccessors();
457  }
458  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
459  << "\n");
460  if (UsePhi && UseLoopGuard)
461  LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
462  return !UsePhi ? LoopCountInit : LoopSetup;
463 }
464 
465 void HardwareLoop::InsertLoopDec() {
466  IRBuilder<> CondBuilder(ExitBranch);
467 
468  Function *DecFunc =
469  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
470  LoopDecrement->getType());
471  Value *Ops[] = { LoopDecrement };
472  Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
473  Value *OldCond = ExitBranch->getCondition();
474  ExitBranch->setCondition(NewCond);
475 
476  // The false branch must exit the loop.
477  if (!L->contains(ExitBranch->getSuccessor(0)))
478  ExitBranch->swapSuccessors();
479 
480  // The old condition may be dead now, and may have even created a dead PHI
481  // (the original induction variable).
483 
484  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
485 }
486 
487 Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
488  IRBuilder<> CondBuilder(ExitBranch);
489 
490  Function *DecFunc =
491  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
492  { EltsRem->getType() });
493  Value *Ops[] = { EltsRem, LoopDecrement };
494  Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
495 
496  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
497  return cast<Instruction>(Call);
498 }
499 
500 PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
501  BasicBlock *Preheader = L->getLoopPreheader();
502  BasicBlock *Header = L->getHeader();
503  BasicBlock *Latch = ExitBranch->getParent();
505  PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
506  Index->addIncoming(NumElts, Preheader);
507  Index->addIncoming(EltsRem, Latch);
508  LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
509  return Index;
510 }
511 
512 void HardwareLoop::UpdateBranch(Value *EltsRem) {
513  IRBuilder<> CondBuilder(ExitBranch);
514  Value *NewCond =
515  CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
516  Value *OldCond = ExitBranch->getCondition();
517  ExitBranch->setCondition(NewCond);
518 
519  // The false branch must exit the loop.
520  if (!L->contains(ExitBranch->getSuccessor(0)))
521  ExitBranch->swapSuccessors();
522 
523  // The old condition may be dead now, and may have even created a dead PHI
524  // (the original induction variable).
526 }
527 
528 INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
534 
535 FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:279
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:496
AssumptionCache.h
CounterBitWidth
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1329
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
ScalarEvolutionExpander.h
Scalar.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:99
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:97
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
LoopDecrement
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
PassRegistry.h
createHWLoopAnalysis
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
Definition: HardwareLoops.cpp:94
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:898
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2135
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
Utils.h
ForceHardwareLoops
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
Passes.h
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
debugHWLoopFailure
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
Definition: HardwareLoops.cpp:82
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1206
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::initializeHardwareLoopsPass
void initializeHardwareLoopsPass(PassRegistry &)
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
TargetPassConfig.h
ForceGuardLoopEntry
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
CanGenerateTest
static bool CanGenerateTest(Loop *L, Value *Count)
Definition: HardwareLoops.cpp:342
HW_LOOPS_NAME
#define HW_LOOPS_NAME
Definition: HardwareLoops.cpp:49
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::createHardwareLoopsPass
FunctionPass * createHardwareLoopsPass()
Create Hardware Loop pass.
Definition: HardwareLoops.cpp:535
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:775
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:98
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::HardwareLoopInfo::TripCount
const SCEV * TripCount
Definition: TargetTransformInfo.h:100
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:157
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:94
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HardwareLoops.cpp:47
Instructions.h
llvm::isSafeToExpandAt
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are d...
Definition: ScalarEvolutionExpander.cpp:2705
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
Dominators.h
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:799
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2600
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:269
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3035
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38