17 #include "llvm/IR/IntrinsicsNVPTX.h" 21 #define DEBUG_TYPE "NVPTXtti" 26 default:
return false;
27 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
28 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
29 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
35 return II->
getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
41 default:
return false;
42 case Intrinsic::nvvm_atomic_load_inc_32:
43 case Intrinsic::nvvm_atomic_load_dec_32:
45 case Intrinsic::nvvm_atomic_add_gen_f_cta:
46 case Intrinsic::nvvm_atomic_add_gen_f_sys:
47 case Intrinsic::nvvm_atomic_add_gen_i_cta:
48 case Intrinsic::nvvm_atomic_add_gen_i_sys:
49 case Intrinsic::nvvm_atomic_and_gen_i_cta:
50 case Intrinsic::nvvm_atomic_and_gen_i_sys:
51 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
52 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
53 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
54 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
55 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
56 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
57 case Intrinsic::nvvm_atomic_max_gen_i_cta:
58 case Intrinsic::nvvm_atomic_max_gen_i_sys:
59 case Intrinsic::nvvm_atomic_min_gen_i_cta:
60 case Intrinsic::nvvm_atomic_min_gen_i_sys:
61 case Intrinsic::nvvm_atomic_or_gen_i_cta:
62 case Intrinsic::nvvm_atomic_or_gen_i_sys:
63 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
64 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
65 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
66 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
80 if (
const LoadInst *LI = dyn_cast<LoadInst>(
I)) {
81 unsigned AS = LI->getPointerAddressSpace();
107 if (isa<CallInst>(
I))
126 enum FtzRequirementTy {
140 struct SimplifyAction {
147 FtzRequirementTy FtzRequirement = FTZ_Any;
149 SimplifyAction() =
default;
152 : IID(IID), FtzRequirement(FtzReq) {}
159 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
161 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
162 : Special(Special), FtzRequirement(FtzReq) {}
167 const SimplifyAction Action = [II]() -> SimplifyAction {
170 case Intrinsic::nvvm_ceil_d:
171 return {Intrinsic::ceil, FTZ_Any};
172 case Intrinsic::nvvm_ceil_f:
173 return {Intrinsic::ceil, FTZ_MustBeOff};
174 case Intrinsic::nvvm_ceil_ftz_f:
175 return {Intrinsic::ceil, FTZ_MustBeOn};
176 case Intrinsic::nvvm_fabs_d:
177 return {Intrinsic::fabs, FTZ_Any};
178 case Intrinsic::nvvm_fabs_f:
179 return {Intrinsic::fabs, FTZ_MustBeOff};
180 case Intrinsic::nvvm_fabs_ftz_f:
181 return {Intrinsic::fabs, FTZ_MustBeOn};
182 case Intrinsic::nvvm_floor_d:
183 return {Intrinsic::floor, FTZ_Any};
184 case Intrinsic::nvvm_floor_f:
185 return {Intrinsic::floor, FTZ_MustBeOff};
186 case Intrinsic::nvvm_floor_ftz_f:
187 return {Intrinsic::floor, FTZ_MustBeOn};
188 case Intrinsic::nvvm_fma_rn_d:
189 return {Intrinsic::fma, FTZ_Any};
190 case Intrinsic::nvvm_fma_rn_f:
191 return {Intrinsic::fma, FTZ_MustBeOff};
192 case Intrinsic::nvvm_fma_rn_ftz_f:
193 return {Intrinsic::fma, FTZ_MustBeOn};
194 case Intrinsic::nvvm_fmax_d:
196 case Intrinsic::nvvm_fmax_f:
198 case Intrinsic::nvvm_fmax_ftz_f:
200 case Intrinsic::nvvm_fmin_d:
202 case Intrinsic::nvvm_fmin_f:
204 case Intrinsic::nvvm_fmin_ftz_f:
206 case Intrinsic::nvvm_round_d:
208 case Intrinsic::nvvm_round_f:
210 case Intrinsic::nvvm_round_ftz_f:
212 case Intrinsic::nvvm_sqrt_rn_d:
213 return {Intrinsic::sqrt, FTZ_Any};
214 case Intrinsic::nvvm_sqrt_f:
219 return {Intrinsic::sqrt, FTZ_Any};
220 case Intrinsic::nvvm_sqrt_rn_f:
221 return {Intrinsic::sqrt, FTZ_MustBeOff};
222 case Intrinsic::nvvm_sqrt_rn_ftz_f:
223 return {Intrinsic::sqrt, FTZ_MustBeOn};
224 case Intrinsic::nvvm_trunc_d:
225 return {Intrinsic::trunc, FTZ_Any};
226 case Intrinsic::nvvm_trunc_f:
227 return {Intrinsic::trunc, FTZ_MustBeOff};
228 case Intrinsic::nvvm_trunc_ftz_f:
229 return {Intrinsic::trunc, FTZ_MustBeOn};
236 case Intrinsic::nvvm_d2i_rz:
237 case Intrinsic::nvvm_f2i_rz:
238 case Intrinsic::nvvm_d2ll_rz:
239 case Intrinsic::nvvm_f2ll_rz:
240 return {Instruction::FPToSI};
241 case Intrinsic::nvvm_d2ui_rz:
242 case Intrinsic::nvvm_f2ui_rz:
243 case Intrinsic::nvvm_d2ull_rz:
244 case Intrinsic::nvvm_f2ull_rz:
245 return {Instruction::FPToUI};
246 case Intrinsic::nvvm_i2d_rz:
247 case Intrinsic::nvvm_i2f_rz:
248 case Intrinsic::nvvm_ll2d_rz:
249 case Intrinsic::nvvm_ll2f_rz:
250 return {Instruction::SIToFP};
251 case Intrinsic::nvvm_ui2d_rz:
252 case Intrinsic::nvvm_ui2f_rz:
253 case Intrinsic::nvvm_ull2d_rz:
254 case Intrinsic::nvvm_ull2f_rz:
255 return {Instruction::UIToFP};
258 case Intrinsic::nvvm_add_rn_d:
260 case Intrinsic::nvvm_add_rn_f:
262 case Intrinsic::nvvm_add_rn_ftz_f:
264 case Intrinsic::nvvm_mul_rn_d:
266 case Intrinsic::nvvm_mul_rn_f:
268 case Intrinsic::nvvm_mul_rn_ftz_f:
270 case Intrinsic::nvvm_div_rn_d:
271 return {Instruction::FDiv, FTZ_Any};
272 case Intrinsic::nvvm_div_rn_f:
273 return {Instruction::FDiv, FTZ_MustBeOff};
274 case Intrinsic::nvvm_div_rn_ftz_f:
275 return {Instruction::FDiv, FTZ_MustBeOn};
282 case Intrinsic::nvvm_rcp_rn_d:
283 return {SPC_Reciprocal, FTZ_Any};
284 case Intrinsic::nvvm_rcp_rn_f:
285 return {SPC_Reciprocal, FTZ_MustBeOff};
286 case Intrinsic::nvvm_rcp_rn_ftz_f:
287 return {SPC_Reciprocal, FTZ_MustBeOn};
318 if (Action.FtzRequirement != FTZ_Any) {
325 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
353 switch (*Action.Special) {
360 llvm_unreachable(
"All SpecialCase enumerators should be handled in switch.");
386 Opd1PropInfo, Opd2PropInfo);
400 Opd1PropInfo, Opd2PropInfo);
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
This class represents an incoming formal argument to a Function.
This class represents lattice values for constants.
Cost tables and simple lookup functions.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
The main scalar evolution driver.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
static uint64_t round(uint64_t Acc, uint64_t Input)
An instruction for reading from memory.
Value * getArgOperand(unsigned i) const
The core instruction combiner logic.
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Simple integer binary arithmetic operators.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
iterator_range< User::op_iterator > arg_operands()
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
The instances of the Type class are immutable: once they are created, they are never changed.
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
IEEE-754 denormal numbers preserved.
bool isSourceOfDivergence(const Value *V)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
const Function * getFunction() const
Return the function this instruction belongs to.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Represent subnormal handling kind for floating point instruction inputs and outputs.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Bitwise operators - logical and, logical or, logical xor.
StringRef getValueAsString() const
Return the attribute's value as a string.
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
LLVM Value Representation.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
StringRef - Represent a constant reference to a string, i.e.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
A wrapper class for inspecting calls to intrinsic functions.
This file describes how to lower LLVM code to machine code.