LLVM  14.0.0git
MVETailPredication.cpp
Go to the documentation of this file.
1 //===- MVETailPredication.cpp - MVE Tail Predication ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead
11 /// branches to help accelerate DSP applications. These two extensions,
12 /// combined with a new form of predication called tail-predication, can be used
13 /// to provide implicit vector predication within a low-overhead loop.
14 /// This is implicit because the predicate of active/inactive lanes is
15 /// calculated by hardware, and thus does not need to be explicitly passed
16 /// to vector instructions. The instructions responsible for this are the
17 /// DLSTP and WLSTP instructions, which setup a tail-predicated loop and the
18 /// the total number of data elements processed by the loop. The loop-end
19 /// LETP instruction is responsible for decrementing and setting the remaining
20 /// elements to be processed and generating the mask of active lanes.
21 ///
22 /// The HardwareLoops pass inserts intrinsics identifying loops that the
23 /// backend will attempt to convert into a low-overhead loop. The vectorizer is
24 /// responsible for generating a vectorized loop in which the lanes are
25 /// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
26 /// get.active.lane.mask intrinsic and attempts to convert them to VCTP
27 /// instructions. This will be picked up by the ARM Low-overhead loop pass later
28 /// in the backend, which performs the final transformation to a DLSTP or WLSTP
29 /// tail-predicated loop.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "ARM.h"
34 #include "ARMSubtarget.h"
35 #include "ARMTargetTransformInfo.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
43 #include "llvm/IR/IRBuilder.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicsARM.h"
46 #include "llvm/IR/PatternMatch.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Support/Debug.h"
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "mve-tail-predication"
57 #define DESC "Transform predicated vector loops to use MVE tail predication"
58 
60  "tail-predication", cl::desc("MVE tail-predication pass options"),
63  "Don't tail-predicate loops"),
65  "enabled-no-reductions",
66  "Enable tail-predication, but not for reduction loops"),
68  "enabled",
69  "Enable tail-predication, including reduction loops"),
71  "force-enabled-no-reductions",
72  "Enable tail-predication, but not for reduction loops, "
73  "and force this which might be unsafe"),
75  "force-enabled",
76  "Enable tail-predication, including reduction loops, "
77  "and force this which might be unsafe")));
78 
79 
80 namespace {
81 
82 class MVETailPredication : public LoopPass {
84  Loop *L = nullptr;
85  ScalarEvolution *SE = nullptr;
86  TargetTransformInfo *TTI = nullptr;
87  const ARMSubtarget *ST = nullptr;
88 
89 public:
90  static char ID;
91 
92  MVETailPredication() : LoopPass(ID) { }
93 
94  void getAnalysisUsage(AnalysisUsage &AU) const override {
100  AU.setPreservesCFG();
101  }
102 
103  bool runOnLoop(Loop *L, LPPassManager&) override;
104 
105 private:
106  /// Perform the relevant checks on the loop and convert active lane masks if
107  /// possible.
108  bool TryConvertActiveLaneMask(Value *TripCount);
109 
110  /// Perform several checks on the arguments of @llvm.get.active.lane.mask
111  /// intrinsic. E.g., check that the loop induction variable and the element
112  /// count are of the form we expect, and also perform overflow checks for
113  /// the new expressions that are created.
114  bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
115 
116  /// Insert the intrinsic to represent the effect of tail predication.
117  void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
118 
119  /// Rematerialize the iteration count in exit blocks, which enables
120  /// ARMLowOverheadLoops to better optimise away loop update statements inside
121  /// hardware-loops.
122  void RematerializeIterCount();
123 };
124 
125 } // end namespace
126 
127 bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
128  if (skipLoop(L) || !EnableTailPredication)
129  return false;
130 
131  MaskedInsts.clear();
132  Function &F = *L->getHeader()->getParent();
133  auto &TPC = getAnalysis<TargetPassConfig>();
134  auto &TM = TPC.getTM<TargetMachine>();
135  ST = &TM.getSubtarget<ARMSubtarget>(F);
136  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
137  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
138  this->L = L;
139 
140  // The MVE and LOB extensions are combined to enable tail-predication, but
141  // there's nothing preventing us from generating VCTP instructions for v8.1m.
142  if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) {
143  LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n");
144  return false;
145  }
146 
147  BasicBlock *Preheader = L->getLoopPreheader();
148  if (!Preheader)
149  return false;
150 
151  auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* {
152  for (auto &I : *BB) {
153  auto *Call = dyn_cast<IntrinsicInst>(&I);
154  if (!Call)
155  continue;
156 
157  Intrinsic::ID ID = Call->getIntrinsicID();
158  if (ID == Intrinsic::start_loop_iterations ||
159  ID == Intrinsic::test_start_loop_iterations)
160  return cast<IntrinsicInst>(&I);
161  }
162  return nullptr;
163  };
164 
165  // Look for the hardware loop intrinsic that sets the iteration count.
166  IntrinsicInst *Setup = FindLoopIterations(Preheader);
167 
168  // The test.set iteration could live in the pre-preheader.
169  if (!Setup) {
170  if (!Preheader->getSinglePredecessor())
171  return false;
172  Setup = FindLoopIterations(Preheader->getSinglePredecessor());
173  if (!Setup)
174  return false;
175  }
176 
177  LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
178 
179  bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
180 
181  return Changed;
182 }
183 
184 // The active lane intrinsic has this form:
185 //
186 // @llvm.get.active.lane.mask(IV, TC)
187 //
188 // Here we perform checks that this intrinsic behaves as expected,
189 // which means:
190 //
191 // 1) Check that the TripCount (TC) belongs to this loop (originally).
192 // 2) The element count (TC) needs to be sufficiently large that the decrement
193 // of element counter doesn't overflow, which means that we need to prove:
194 // ceil(ElementCount / VectorWidth) >= TripCount
195 // by rounding up ElementCount up:
196 // ((ElementCount + (VectorWidth - 1)) / VectorWidth
197 // and evaluate if expression isKnownNonNegative:
198 // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
199 // 3) The IV must be an induction phi with an increment equal to the
200 // vector width.
201 bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
202  Value *TripCount) {
203  bool ForceTailPredication =
206 
207  Value *ElemCount = ActiveLaneMask->getOperand(1);
208  bool Changed = false;
209  if (!L->makeLoopInvariant(ElemCount, Changed))
210  return false;
211 
212  auto *EC= SE->getSCEV(ElemCount);
213  auto *TC = SE->getSCEV(TripCount);
214  int VectorWidth =
215  cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
216  if (VectorWidth != 2 && VectorWidth != 4 && VectorWidth != 8 &&
217  VectorWidth != 16)
218  return false;
219  ConstantInt *ConstElemCount = nullptr;
220 
221  // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
222  // this loop. The scalar tripcount corresponds the number of elements
223  // processed by the loop, so we will refer to that from this point on.
224  if (!SE->isLoopInvariant(EC, L)) {
225  LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
226  return false;
227  }
228 
229  if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
230  ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
231  if (!TC) {
232  LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
233  "set.loop.iterations\n");
234  return false;
235  }
236 
237  // Calculate 2 tripcount values and check that they are consistent with
238  // each other. The TripCount for a predicated vector loop body is
239  // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
240  // work it out here.
241  uint64_t TC1 = TC->getZExtValue();
242  uint64_t TC2 =
243  (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
244 
245  // If the tripcount values are inconsistent, we can't insert the VCTP and
246  // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
247  // and legalize this.
248  if (TC1 != TC2) {
249  LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
250  << TC1 << " from set.loop.iterations, and "
251  << TC2 << " from get.active.lane.mask\n");
252  return false;
253  }
254  } else if (!ForceTailPredication) {
255  // 2) We need to prove that the sub expression that we create in the
256  // tail-predicated loop body, which calculates the remaining elements to be
257  // processed, is non-negative, i.e. it doesn't overflow:
258  //
259  // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
260  //
261  // This is true if:
262  //
263  // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
264  //
265  // which what we will be using here.
266  //
267  auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
268  // ElementCount + (VW-1):
269  auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
270  SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
271 
272  // Ceil = ElementCount + (VW-1) / VW
273  auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
274 
275  // Prevent unused variable warnings with TC
276  (void)TC;
277  LLVM_DEBUG(
278  dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
279  dbgs() << "ARM TP: - TripCount = "; TC->dump();
280  dbgs() << "ARM TP: - ElemCount = "; EC->dump();
281  dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
282  dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
283  );
284 
285  // As an example, almost all the tripcount expressions (produced by the
286  // vectoriser) look like this:
287  //
288  // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
289  //
290  // and "ElementCount + (VW-1) / VW":
291  //
292  // Ceil = ((3 + %N) /u 4)
293  //
294  // Check for equality of TC and Ceil by calculating SCEV expression
295  // TC - Ceil and test it for zero.
296  //
297  const SCEV *Sub =
298  SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
299  SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
300  SE->getNegativeSCEV(VW)),
301  VW));
302 
303  // Use context sensitive facts about the path to the loop to refine. This
304  // comes up as the backedge taken count can incorporate context sensitive
305  // reasoning, and our RHS just above doesn't.
306  Sub = SE->applyLoopGuards(Sub, L);
307 
308  if (!Sub->isZero()) {
309  LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
310  return false;
311  }
312  }
313 
314  // 3) Find out if IV is an induction phi. Note that we can't use Loop
315  // helpers here to get the induction variable, because the hardware loop is
316  // no longer in loopsimplify form, and also the hwloop intrinsic uses a
317  // different counter. Using SCEV, we check that the induction is of the
318  // form i = i + 4, where the increment must be equal to the VectorWidth.
319  auto *IV = ActiveLaneMask->getOperand(0);
320  auto *IVExpr = SE->getSCEV(IV);
321  auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
322 
323  if (!AddExpr) {
324  LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
325  return false;
326  }
327  // Check that this AddRec is associated with this loop.
328  if (AddExpr->getLoop() != L) {
329  LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
330  return false;
331  }
332  auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
333  if (!Base || !Base->isZero()) {
334  LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
335  return false;
336  }
337  auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
338  if (!Step) {
339  LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
340  AddExpr->getOperand(1)->dump());
341  return false;
342  }
343  auto StepValue = Step->getValue()->getSExtValue();
344  if (VectorWidth == StepValue)
345  return true;
346 
347  LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
348  << " doesn't match vector width " << VectorWidth << "\n");
349 
350  return false;
351 }
352 
353 void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
354  Value *TripCount) {
356  Module *M = L->getHeader()->getModule();
357  Type *Ty = IntegerType::get(M->getContext(), 32);
358  unsigned VectorWidth =
359  cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
360 
361  // Insert a phi to count the number of elements processed by the loop.
362  Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
363  PHINode *Processed = Builder.CreatePHI(Ty, 2);
364  Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
365 
366  // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
367  // thus represent the effect of tail predication.
368  Builder.SetInsertPoint(ActiveLaneMask);
369  ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
370 
371  Intrinsic::ID VCTPID;
372  switch (VectorWidth) {
373  default:
374  llvm_unreachable("unexpected number of lanes");
375  case 2: VCTPID = Intrinsic::arm_mve_vctp64; break;
376  case 4: VCTPID = Intrinsic::arm_mve_vctp32; break;
377  case 8: VCTPID = Intrinsic::arm_mve_vctp16; break;
378  case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
379  }
380  Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
381  Value *VCTPCall = Builder.CreateCall(VCTP, Processed);
382  ActiveLaneMask->replaceAllUsesWith(VCTPCall);
383 
384  // Add the incoming value to the new phi.
385  // TODO: This add likely already exists in the loop.
386  Value *Remaining = Builder.CreateSub(Processed, Factor);
387  Processed->addIncoming(Remaining, L->getLoopLatch());
388  LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: "
389  << *Processed << "\n"
390  << "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
391 }
392 
393 bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
394  SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
395  for (auto *BB : L->getBlocks())
396  for (auto &I : *BB)
397  if (auto *Int = dyn_cast<IntrinsicInst>(&I))
398  if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
399  ActiveLaneMasks.push_back(Int);
400 
401  if (ActiveLaneMasks.empty())
402  return false;
403 
404  LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
405 
406  for (auto *ActiveLaneMask : ActiveLaneMasks) {
407  LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
408  << *ActiveLaneMask << "\n");
409 
410  if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
411  LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
412  return false;
413  }
414  LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
415  InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
416  }
417 
418  // Remove dead instructions and now dead phis.
419  for (auto *II : ActiveLaneMasks)
421  for (auto I : L->blocks())
422  DeleteDeadPHIs(I);
423  return true;
424 }
425 
427  return new MVETailPredication();
428 }
429 
430 char MVETailPredication::ID = 0;
431 
432 INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false)
433 INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false)
ARMSubtarget.h
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:523
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::TailPredication::Disabled
@ Disabled
Definition: ARMTargetTransformInfo.h:43
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1399
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::ARMSubtarget
Definition: ARMSubtarget.h:47
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4813
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
Local.h
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
llvm::TailPredication::ForceEnabled
@ ForceEnabled
Definition: ARMTargetTransformInfo.h:47
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TailPredication::Enabled
@ Enabled
Definition: ARMTargetTransformInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2123
llvm::LPPassManager
Definition: LoopPass.h:75
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MVETailPredication.cpp:56
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2451
llvm::tgtok::Int
@ Int
Definition: TGLexer.h:51
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2798
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
TargetPassConfig.h
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
ARM.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::Loop::makeLoopInvariant
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:74
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
EnableTailPredication
cl::opt< TailPredication::Mode > EnableTailPredication("tail-predication", cl::desc("MVE tail-predication pass options"), cl::init(TailPredication::Enabled), cl::values(clEnumValN(TailPredication::Disabled, "disabled", "Don't tail-predicate loops"), clEnumValN(TailPredication::EnabledNoReductions, "enabled-no-reductions", "Enable tail-predication, but not for reduction loops"), clEnumValN(TailPredication::Enabled, "enabled", "Enable tail-predication, including reduction loops"), clEnumValN(TailPredication::ForceEnabledNoReductions, "force-enabled-no-reductions", "Enable tail-predication, but not for reduction loops, " "and force this which might be unsafe"), clEnumValN(TailPredication::ForceEnabled, "force-enabled", "Enable tail-predication, including reduction loops, " "and force this which might be unsafe")))
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
DESC
#define DESC
Definition: MVETailPredication.cpp:57
llvm::createMVETailPredicationPass
Pass * createMVETailPredicationPass()
Definition: MVETailPredication.cpp:426
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:164
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
llvm::TailPredication::EnabledNoReductions
@ EnabledNoReductions
Definition: ARMTargetTransformInfo.h:44
llvm::orc::SimpleRemoteEPCOpcode::Setup
@ Setup
TargetTransformInfo.h
ARMTargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2648
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
BasicBlockUtils.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::TailPredication::ForceEnabledNoReductions
@ ForceEnabledNoReductions
Definition: ARMTargetTransformInfo.h:46