28 #define DEBUG_TYPE "amdgpu-codegenprepare"
49 unsigned getBaseElementBitWidth(
const Type *
T)
const;
66 bool needsPromotionToI32(
const Type *T)
const;
88 bool promoteUniformOpToI32(
ICmpInst &I)
const;
99 bool promoteUniformOpToI32(
SelectInst &I)
const;
118 TM(static_cast<const GCNTargetMachine *>(
TM)),
122 HasUnsafeFPMath(
false) { }
126 bool visitInstruction(
Instruction &I) {
return false; }
134 bool doInitialization(
Module &M)
override;
135 bool runOnFunction(
Function &
F)
override;
137 StringRef getPassName()
const override {
return "AMDGPU IR optimizations"; }
147 Value *AMDGPUCodeGenPrepare::copyFlags(
153 if (isa<OverflowingBinaryOperator>(BinOp)) {
156 }
else if (isa<PossiblyExactOperator>(BinOp))
162 unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(
const Type *
T)
const {
163 assert(needsPromotionToI32(T) &&
"T does not need promotion to i32");
167 return cast<VectorType>(
T)->getElementType()->getIntegerBitWidth();
171 assert(needsPromotionToI32(T) &&
"T does not need promotion to i32");
178 bool AMDGPUCodeGenPrepare::isSigned(
const BinaryOperator &I)
const {
179 return I.
getOpcode() == Instruction::AShr ||
183 bool AMDGPUCodeGenPrepare::isSigned(
const SelectInst &I)
const {
188 bool AMDGPUCodeGenPrepare::needsPromotionToI32(
const Type *T)
const {
194 return needsPromotionToI32(cast<VectorType>(T)->getElementType());
197 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(
BinaryOperator &I)
const {
199 "I does not need promotion to i32");
201 if (I.
getOpcode() == Instruction::SDiv ||
209 Value *ExtOp0 =
nullptr;
210 Value *ExtOp1 =
nullptr;
211 Value *ExtRes =
nullptr;
212 Value *TruncRes =
nullptr;
215 ExtOp0 = Builder.CreateSExt(I.
getOperand(0), I32Ty);
216 ExtOp1 = Builder.CreateSExt(I.
getOperand(1), I32Ty);
218 ExtOp0 = Builder.CreateZExt(I.
getOperand(0), I32Ty);
219 ExtOp1 = Builder.CreateZExt(I.
getOperand(1), I32Ty);
221 ExtRes = copyFlags(I, Builder.CreateBinOp(I.
getOpcode(), ExtOp0, ExtOp1));
222 TruncRes = Builder.CreateTrunc(ExtRes, I.
getType());
230 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(
ICmpInst &I)
const {
232 "I does not need promotion to i32");
238 Value *ExtOp0 =
nullptr;
239 Value *ExtOp1 =
nullptr;
240 Value *NewICmp =
nullptr;
243 ExtOp0 = Builder.CreateSExt(I.
getOperand(0), I32Ty);
244 ExtOp1 = Builder.CreateSExt(I.
getOperand(1), I32Ty);
246 ExtOp0 = Builder.CreateZExt(I.
getOperand(0), I32Ty);
247 ExtOp1 = Builder.CreateZExt(I.
getOperand(1), I32Ty);
249 NewICmp = Builder.CreateICmp(I.
getPredicate(), ExtOp0, ExtOp1);
257 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(
SelectInst &I)
const {
259 "I does not need promotion to i32");
265 Value *ExtOp1 =
nullptr;
266 Value *ExtOp2 =
nullptr;
267 Value *ExtRes =
nullptr;
268 Value *TruncRes =
nullptr;
271 ExtOp1 = Builder.CreateSExt(I.
getOperand(1), I32Ty);
272 ExtOp2 = Builder.CreateSExt(I.
getOperand(2), I32Ty);
274 ExtOp1 = Builder.CreateZExt(I.
getOperand(1), I32Ty);
275 ExtOp2 = Builder.CreateZExt(I.
getOperand(2), I32Ty);
277 ExtRes = Builder.CreateSelect(I.
getOperand(0), ExtOp1, ExtOp2);
278 TruncRes = Builder.CreateTrunc(ExtRes, I.
getType());
286 bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
289 "I must be bitreverse intrinsic");
291 "I does not need promotion to i32");
300 Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
302 Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.
getType()));
304 Builder.CreateTrunc(LShrOp, I.
getType());
342 if (
ST->hasFP32Denormals() && !UnsafeDiv)
346 Builder.setFastMathFlags(FMF);
347 Builder.SetCurrentDebugLocation(FDiv.
getDebugLoc());
356 Value *NewFDiv =
nullptr;
358 if (
VectorType *VT = dyn_cast<VectorType>(Ty)) {
363 for (
unsigned I = 0,
E = VT->getNumElements(); I !=
E; ++
I) {
364 Value *NumEltI = Builder.CreateExtractElement(Num, I);
365 Value *DenEltI = Builder.CreateExtractElement(Den, I);
369 NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
371 NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
374 NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
378 NewFDiv = Builder.CreateCall(Decl, { Num, Den });
395 bool AMDGPUCodeGenPrepare::visitBinaryOperator(
BinaryOperator &I) {
396 bool Changed =
false;
398 if (
ST->has16BitInsts() && needsPromotionToI32(I.
getType()) &&
400 Changed |= promoteUniformOpToI32(I);
405 bool AMDGPUCodeGenPrepare::visitICmpInst(
ICmpInst &I) {
406 bool Changed =
false;
410 Changed |= promoteUniformOpToI32(I);
415 bool AMDGPUCodeGenPrepare::visitSelectInst(
SelectInst &I) {
416 bool Changed =
false;
418 if (
ST->has16BitInsts() && needsPromotionToI32(I.
getType()) &&
420 Changed |= promoteUniformOpToI32(I);
425 bool AMDGPUCodeGenPrepare::visitIntrinsicInst(
IntrinsicInst &I) {
427 case Intrinsic::bitreverse:
428 return visitBitreverseIntrinsicInst(I);
434 bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(
IntrinsicInst &I) {
435 bool Changed =
false;
437 if (
ST->has16BitInsts() && needsPromotionToI32(I.
getType()) &&
439 Changed |= promoteUniformBitreverseToI32(I);
444 bool AMDGPUCodeGenPrepare::doInitialization(
Module &M) {
449 bool AMDGPUCodeGenPrepare::runOnFunction(
Function &
F) {
450 if (!
TM || skipFunction(F))
454 DA = &getAnalysis<DivergenceAnalysis>();
457 bool MadeChange =
false;
463 MadeChange |= visit(*I);
471 "AMDGPU IR optimizations",
false,
false)
476 char AMDGPUCodeGenPrepare::
ID = 0;
479 return new AMDGPUCodeGenPrepare(TM);
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Base class for instruction visitors.
AMDGPU specific subclass of TargetSubtarget.
A Module instance is used to store all the information related to an LLVM module. ...
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool isSigned() const
Determine if this instruction is using a signed comparison.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,"AMDGPU IR optimizations", false, false) INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
This class represents the LLVM 'select' instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag...
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
bool allowReciprocal() const
void takeName(Value *V)
Transfer the name from V to this value.
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
bool unsafeAlgebra() const
LLVM Basic Block Representation.
bool isExact() const
Determine whether the exact flag is set.
The instances of the Type class are immutable: once they are created, they are never changed...
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
ConstantFP - Floating Point Values [float, double].
#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis)
Target machine pass initializer for passes with dependencies.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Represent the analysis usage information of a pass.
Function * getDeclaration(Module *M, unsigned ID, Type **Tys=nullptr, unsigned NumTys=0) const override
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Value * getOperand(unsigned i) const
self_iterator getIterator()
unsigned getIntegerBitWidth() const
Predicate getPredicate() const
Return the predicate for this instruction.
The AMDGPU TargetMachine interface definition for hw codgen targets.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
BinaryOps getOpcode() const
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Iterator for intrusive lists based on ilist_node.
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
FunctionPass * createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM=nullptr)
Utility class for floating point operations which can have information about relaxed accuracy require...
Type * getType() const
All values are typed, get the type of this value.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
static bool hasUnsafeFPMath(const Function &F)
Class to represent vector types.
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool isIntegerTy() const
True if this is an instance of IntegerType.
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
StringRef getValueAsString() const
Return the attribute's value as a string.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Primary interface to the complete machine description for the target machine.
Convenience struct for specifying and reasoning about fast-math flags.
StringRef - Represent a constant reference to a string, i.e.
Statically lint checks LLVM IR
const BasicBlock * getParent() const
A wrapper class for inspecting calls to intrinsic functions.