LLVM  7.0.0svn
AMDGPUCodeGenPrepare.cpp
Go to the documentation of this file.
1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass does misc. AMDGPU optimizations on IR before instruction
12 /// selection.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/Loads.h"
22 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Intrinsics.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include <cassert>
43 #include <iterator>
44 
45 #define DEBUG_TYPE "amdgpu-codegenprepare"
46 
47 using namespace llvm;
48 
49 namespace {
50 
51 static cl::opt<bool> WidenLoads(
52  "amdgpu-codegenprepare-widen-constant-loads",
53  cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
55  cl::init(true));
56 
57 class AMDGPUCodeGenPrepare : public FunctionPass,
58  public InstVisitor<AMDGPUCodeGenPrepare, bool> {
59  const SISubtarget *ST = nullptr;
60  DivergenceAnalysis *DA = nullptr;
61  Module *Mod = nullptr;
62  bool HasUnsafeFPMath = false;
63  AMDGPUAS AMDGPUASI;
64 
65  /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
66  /// binary operation \p V.
67  ///
68  /// \returns Binary operation \p V.
69  /// \returns \p T's base element bit width.
70  unsigned getBaseElementBitWidth(const Type *T) const;
71 
72  /// \returns Equivalent 32 bit integer type for given type \p T. For example,
73  /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
74  /// is returned.
75  Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
76 
77  /// \returns True if binary operation \p I is a signed binary operation, false
78  /// otherwise.
79  bool isSigned(const BinaryOperator &I) const;
80 
81  /// \returns True if the condition of 'select' operation \p I comes from a
82  /// signed 'icmp' operation, false otherwise.
83  bool isSigned(const SelectInst &I) const;
84 
85  /// \returns True if type \p T needs to be promoted to 32 bit integer type,
86  /// false otherwise.
87  bool needsPromotionToI32(const Type *T) const;
88 
89  /// Promotes uniform binary operation \p I to equivalent 32 bit binary
90  /// operation.
91  ///
92  /// \details \p I's base element bit width must be greater than 1 and less
93  /// than or equal 16. Promotion is done by sign or zero extending operands to
94  /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
95  /// truncating the result of 32 bit binary operation back to \p I's original
96  /// type. Division operation is not promoted.
97  ///
98  /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
99  /// false otherwise.
100  bool promoteUniformOpToI32(BinaryOperator &I) const;
101 
102  /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
103  ///
104  /// \details \p I's base element bit width must be greater than 1 and less
105  /// than or equal 16. Promotion is done by sign or zero extending operands to
106  /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
107  ///
108  /// \returns True.
109  bool promoteUniformOpToI32(ICmpInst &I) const;
110 
111  /// Promotes uniform 'select' operation \p I to 32 bit 'select'
112  /// operation.
113  ///
114  /// \details \p I's base element bit width must be greater than 1 and less
115  /// than or equal 16. Promotion is done by sign or zero extending operands to
116  /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
117  /// result of 32 bit 'select' operation back to \p I's original type.
118  ///
119  /// \returns True.
120  bool promoteUniformOpToI32(SelectInst &I) const;
121 
122  /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
123  /// intrinsic.
124  ///
125  /// \details \p I's base element bit width must be greater than 1 and less
126  /// than or equal 16. Promotion is done by zero extending the operand to 32
127  /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
128  /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
129  /// shift amount is 32 minus \p I's base element bit width), and truncating
130  /// the result of the shift operation back to \p I's original type.
131  ///
132  /// \returns True.
133  bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
134  /// Widen a scalar load.
135  ///
136  /// \details \p Widen scalar load for uniform, small type loads from constant
137  // memory / to a full 32-bits and then truncate the input to allow a scalar
138  // load instead of a vector load.
139  //
140  /// \returns True.
141 
142  bool canWidenScalarExtLoad(LoadInst &I) const;
143 
144 public:
145  static char ID;
146 
147  AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
148 
149  bool visitFDiv(BinaryOperator &I);
150 
151  bool visitInstruction(Instruction &I) { return false; }
152  bool visitBinaryOperator(BinaryOperator &I);
153  bool visitLoadInst(LoadInst &I);
154  bool visitICmpInst(ICmpInst &I);
155  bool visitSelectInst(SelectInst &I);
156 
157  bool visitIntrinsicInst(IntrinsicInst &I);
158  bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
159 
160  bool doInitialization(Module &M) override;
161  bool runOnFunction(Function &F) override;
162 
163  StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
164 
165  void getAnalysisUsage(AnalysisUsage &AU) const override {
167  AU.setPreservesAll();
168  }
169 };
170 
171 } // end anonymous namespace
172 
173 unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
174  assert(needsPromotionToI32(T) && "T does not need promotion to i32");
175 
176  if (T->isIntegerTy())
177  return T->getIntegerBitWidth();
178  return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
179 }
180 
181 Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
182  assert(needsPromotionToI32(T) && "T does not need promotion to i32");
183 
184  if (T->isIntegerTy())
185  return B.getInt32Ty();
186  return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
187 }
188 
189 bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
190  return I.getOpcode() == Instruction::AShr ||
191  I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
192 }
193 
194 bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
195  return isa<ICmpInst>(I.getOperand(0)) ?
196  cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
197 }
198 
199 bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
200  const IntegerType *IntTy = dyn_cast<IntegerType>(T);
201  if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
202  return true;
203 
204  if (const VectorType *VT = dyn_cast<VectorType>(T)) {
205  // TODO: The set of packed operations is more limited, so may want to
206  // promote some anyway.
207  if (ST->hasVOP3PInsts())
208  return false;
209 
210  return needsPromotionToI32(VT->getElementType());
211  }
212 
213  return false;
214 }
215 
216 // Return true if the op promoted to i32 should have nsw set.
217 static bool promotedOpIsNSW(const Instruction &I) {
218  switch (I.getOpcode()) {
219  case Instruction::Shl:
220  case Instruction::Add:
221  case Instruction::Sub:
222  return true;
223  case Instruction::Mul:
224  return I.hasNoUnsignedWrap();
225  default:
226  return false;
227  }
228 }
229 
230 // Return true if the op promoted to i32 should have nuw set.
231 static bool promotedOpIsNUW(const Instruction &I) {
232  switch (I.getOpcode()) {
233  case Instruction::Shl:
234  case Instruction::Add:
235  case Instruction::Mul:
236  return true;
237  case Instruction::Sub:
238  return I.hasNoUnsignedWrap();
239  default:
240  return false;
241  }
242 }
243 
244 bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
245  Type *Ty = I.getType();
246  const DataLayout &DL = Mod->getDataLayout();
247  int TySize = DL.getTypeSizeInBits(Ty);
248  unsigned Align = I.getAlignment() ?
249  I.getAlignment() : DL.getABITypeAlignment(Ty);
250 
251  return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
252 }
253 
254 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
255  assert(needsPromotionToI32(I.getType()) &&
256  "I does not need promotion to i32");
257 
258  if (I.getOpcode() == Instruction::SDiv ||
259  I.getOpcode() == Instruction::UDiv)
260  return false;
261 
262  IRBuilder<> Builder(&I);
264 
265  Type *I32Ty = getI32Ty(Builder, I.getType());
266  Value *ExtOp0 = nullptr;
267  Value *ExtOp1 = nullptr;
268  Value *ExtRes = nullptr;
269  Value *TruncRes = nullptr;
270 
271  if (isSigned(I)) {
272  ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
273  ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
274  } else {
275  ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
276  ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
277  }
278 
279  ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
280  if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
281  if (promotedOpIsNSW(cast<Instruction>(I)))
282  Inst->setHasNoSignedWrap();
283 
284  if (promotedOpIsNUW(cast<Instruction>(I)))
285  Inst->setHasNoUnsignedWrap();
286 
287  if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
288  Inst->setIsExact(ExactOp->isExact());
289  }
290 
291  TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
292 
293  I.replaceAllUsesWith(TruncRes);
294  I.eraseFromParent();
295 
296  return true;
297 }
298 
299 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
300  assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
301  "I does not need promotion to i32");
302 
303  IRBuilder<> Builder(&I);
305 
306  Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
307  Value *ExtOp0 = nullptr;
308  Value *ExtOp1 = nullptr;
309  Value *NewICmp = nullptr;
310 
311  if (I.isSigned()) {
312  ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
313  ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
314  } else {
315  ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
316  ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
317  }
318  NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
319 
320  I.replaceAllUsesWith(NewICmp);
321  I.eraseFromParent();
322 
323  return true;
324 }
325 
326 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
327  assert(needsPromotionToI32(I.getType()) &&
328  "I does not need promotion to i32");
329 
330  IRBuilder<> Builder(&I);
332 
333  Type *I32Ty = getI32Ty(Builder, I.getType());
334  Value *ExtOp1 = nullptr;
335  Value *ExtOp2 = nullptr;
336  Value *ExtRes = nullptr;
337  Value *TruncRes = nullptr;
338 
339  if (isSigned(I)) {
340  ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
341  ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
342  } else {
343  ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
344  ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
345  }
346  ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
347  TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
348 
349  I.replaceAllUsesWith(TruncRes);
350  I.eraseFromParent();
351 
352  return true;
353 }
354 
355 bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
356  IntrinsicInst &I) const {
357  assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
358  "I must be bitreverse intrinsic");
359  assert(needsPromotionToI32(I.getType()) &&
360  "I does not need promotion to i32");
361 
362  IRBuilder<> Builder(&I);
364 
365  Type *I32Ty = getI32Ty(Builder, I.getType());
366  Function *I32 =
367  Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
368  Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
369  Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
370  Value *LShrOp =
371  Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
372  Value *TruncRes =
373  Builder.CreateTrunc(LShrOp, I.getType());
374 
375  I.replaceAllUsesWith(TruncRes);
376  I.eraseFromParent();
377 
378  return true;
379 }
380 
381 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
382  const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
383  if (!CNum)
384  return HasDenormals;
385 
386  if (UnsafeDiv)
387  return true;
388 
389  bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
390 
391  // Reciprocal f32 is handled separately without denormals.
392  return HasDenormals ^ IsOne;
393 }
394 
395 // Insert an intrinsic for fast fdiv for safe math situations where we can
396 // reduce precision. Leave fdiv for situations where the generic node is
397 // expected to be optimized.
398 bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
399  Type *Ty = FDiv.getType();
400 
401  if (!Ty->getScalarType()->isFloatTy())
402  return false;
403 
404  MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
405  if (!FPMath)
406  return false;
407 
408  const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
409  float ULP = FPOp->getFPAccuracy();
410  if (ULP < 2.5f)
411  return false;
412 
413  FastMathFlags FMF = FPOp->getFastMathFlags();
414  bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
415  FMF.allowReciprocal();
416 
417  // With UnsafeDiv node will be optimized to just rcp and mul.
418  if (UnsafeDiv)
419  return false;
420 
421  IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
422  Builder.setFastMathFlags(FMF);
423  Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
424 
425  Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
426 
427  Value *Num = FDiv.getOperand(0);
428  Value *Den = FDiv.getOperand(1);
429 
430  Value *NewFDiv = nullptr;
431 
432  bool HasDenormals = ST->hasFP32Denormals();
433  if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
434  NewFDiv = UndefValue::get(VT);
435 
436  // FIXME: Doesn't do the right thing for cases where the vector is partially
437  // constant. This works when the scalarizer pass is run first.
438  for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
439  Value *NumEltI = Builder.CreateExtractElement(Num, I);
440  Value *DenEltI = Builder.CreateExtractElement(Den, I);
441  Value *NewElt;
442 
443  if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
444  NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
445  } else {
446  NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
447  }
448 
449  NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
450  }
451  } else {
452  if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
453  NewFDiv = Builder.CreateCall(Decl, { Num, Den });
454  }
455 
456  if (NewFDiv) {
457  FDiv.replaceAllUsesWith(NewFDiv);
458  NewFDiv->takeName(&FDiv);
459  FDiv.eraseFromParent();
460  }
461 
462  return !!NewFDiv;
463 }
464 
465 static bool hasUnsafeFPMath(const Function &F) {
466  Attribute Attr = F.getFnAttribute("unsafe-fp-math");
467  return Attr.getValueAsString() == "true";
468 }
469 
470 bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
471  bool Changed = false;
472 
473  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
474  DA->isUniform(&I))
475  Changed |= promoteUniformOpToI32(I);
476 
477  return Changed;
478 }
479 
480 bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
481  if (!WidenLoads)
482  return false;
483 
484  if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
485  I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
486  canWidenScalarExtLoad(I)) {
487  IRBuilder<> Builder(&I);
489 
490  Type *I32Ty = Builder.getInt32Ty();
491  Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
492  Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
493  LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
494  WidenLoad->copyMetadata(I);
495 
496  // If we have range metadata, we need to convert the type, and not make
497  // assumptions about the high bits.
498  if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
499  ConstantInt *Lower =
500  mdconst::extract<ConstantInt>(Range->getOperand(0));
501 
502  if (Lower->getValue().isNullValue()) {
503  WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
504  } else {
505  Metadata *LowAndHigh[] = {
507  // Don't make assumptions about the high bits.
509  };
510 
512  MDNode::get(Mod->getContext(), LowAndHigh));
513  }
514  }
515 
516  int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
517  Type *IntNTy = Builder.getIntNTy(TySize);
518  Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
519  Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
520  I.replaceAllUsesWith(ValOrig);
521  I.eraseFromParent();
522  return true;
523  }
524 
525  return false;
526 }
527 
528 bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
529  bool Changed = false;
530 
531  if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
532  DA->isUniform(&I))
533  Changed |= promoteUniformOpToI32(I);
534 
535  return Changed;
536 }
537 
538 bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
539  bool Changed = false;
540 
541  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
542  DA->isUniform(&I))
543  Changed |= promoteUniformOpToI32(I);
544 
545  return Changed;
546 }
547 
548 bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
549  switch (I.getIntrinsicID()) {
550  case Intrinsic::bitreverse:
551  return visitBitreverseIntrinsicInst(I);
552  default:
553  return false;
554  }
555 }
556 
557 bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
558  bool Changed = false;
559 
560  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
561  DA->isUniform(&I))
562  Changed |= promoteUniformBitreverseToI32(I);
563 
564  return Changed;
565 }
566 
567 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
568  Mod = &M;
569  return false;
570 }
571 
573  if (skipFunction(F))
574  return false;
575 
576  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
577  if (!TPC)
578  return false;
579 
580  const TargetMachine &TM = TPC->getTM<TargetMachine>();
581  ST = &TM.getSubtarget<SISubtarget>(F);
582  DA = &getAnalysis<DivergenceAnalysis>();
583  HasUnsafeFPMath = hasUnsafeFPMath(F);
584 
585  bool MadeChange = false;
586 
587  for (BasicBlock &BB : F) {
589  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
590  Next = std::next(I);
591  MadeChange |= visit(*I);
592  }
593  }
594 
595  return MadeChange;
596 }
597 
598 INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
599  "AMDGPU IR optimizations", false, false)
602  false, false)
603 
604 char AMDGPUCodeGenPrepare::ID = 0;
605 
607  return new AMDGPUCodeGenPrepare();
608 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1846
bool isSimple() const
Definition: Instructions.h:262
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1246
Base class for instruction visitors.
Definition: InstVisitor.h:81
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:555
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:864
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1560
FunctionPass * createAMDGPUCodeGenPreparePass()
Metadata node.
Definition: Metadata.h:862
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static bool promotedOpIsNUW(const Instruction &I)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isSigned() const
Definition: InstrTypes.h:1054
This class represents the LLVM &#39;select&#39; instruction.
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:220
AMDGPU IR optimizations
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
This file contains the simple types necessary to represent the attributes associated with functions a...
Metadata * LowAndHigh[]
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
#define T
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1556
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:66
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1001
Value * getOperand(unsigned i) const
Definition: User.h:170
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
Value * getOperand(unsigned i_nocapture) const
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool&#39; fo...
Definition: IRBuilder.h:1305
static bool promotedOpIsNSW(const Instruction &I)
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition: Operator.h:358
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1901
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
bool isFast() const
&#39;Fast&#39; means all bits are set.
Definition: Operator.h:207
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
bool allowReciprocal() const
Definition: Operator.h:203
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1921
The AMDGPU TargetMachine interface definition for hw codgen targets.
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define DEBUG_TYPE
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1552
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:240
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1934
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:611
static bool hasUnsafeFPMath(const Function &F)
The access may modify the value stored in memory.
Class to represent vector types.
Definition: DerivedTypes.h:393
void setPreservesAll()
Set by analyses that do not transform their input at all.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:959
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1212
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:220
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:317
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1072
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isExactlyValue(const APFloat &V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:791
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) INITIALIZE_PASS_END(AMDGPUCodeGenPrepare
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Root of the metadata hierarchy.
Definition: Metadata.h:58
Statically lint checks LLVM IR
Definition: Lint.cpp:193
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:399
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1871