17#include "llvm/IR/IntrinsicsNVPTX.h"
22#define DEBUG_TYPE "NVPTXtti"
26 switch (
II->getIntrinsicID()) {
27 default:
return false;
28 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
29 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
30 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
36 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
41 switch (
II->getIntrinsicID()) {
42 default:
return false;
43 case Intrinsic::nvvm_atomic_load_inc_32:
44 case Intrinsic::nvvm_atomic_load_dec_32:
46 case Intrinsic::nvvm_atomic_add_gen_f_cta:
47 case Intrinsic::nvvm_atomic_add_gen_f_sys:
48 case Intrinsic::nvvm_atomic_add_gen_i_cta:
49 case Intrinsic::nvvm_atomic_add_gen_i_sys:
50 case Intrinsic::nvvm_atomic_and_gen_i_cta:
51 case Intrinsic::nvvm_atomic_and_gen_i_sys:
52 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
53 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
54 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
55 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
56 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
57 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
58 case Intrinsic::nvvm_atomic_max_gen_i_cta:
59 case Intrinsic::nvvm_atomic_max_gen_i_sys:
60 case Intrinsic::nvvm_atomic_min_gen_i_cta:
61 case Intrinsic::nvvm_atomic_min_gen_i_sys:
62 case Intrinsic::nvvm_atomic_or_gen_i_cta:
63 case Intrinsic::nvvm_atomic_or_gen_i_sys:
64 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
65 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
66 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
67 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
75 if (
const Argument *Arg = dyn_cast<Argument>(V))
81 if (
const LoadInst *LI = dyn_cast<LoadInst>(
I)) {
82 unsigned AS = LI->getPointerAddressSpace();
108 if (isa<CallInst>(
I))
127 enum FtzRequirementTy {
141 struct SimplifyAction {
143 std::optional<Intrinsic::ID> IID;
144 std::optional<Instruction::CastOps> CastOp;
145 std::optional<Instruction::BinaryOps> BinaryOp;
146 std::optional<SpecialCase> Special;
148 FtzRequirementTy FtzRequirement = FTZ_Any;
151 bool IsHalfTy =
false;
153 SimplifyAction() =
default;
156 bool IsHalfTy =
false)
157 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
164 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
166 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
167 : Special(Special), FtzRequirement(FtzReq) {}
172 const SimplifyAction Action = [
II]() -> SimplifyAction {
173 switch (
II->getIntrinsicID()) {
175 case Intrinsic::nvvm_ceil_d:
176 return {Intrinsic::ceil, FTZ_Any};
177 case Intrinsic::nvvm_ceil_f:
178 return {Intrinsic::ceil, FTZ_MustBeOff};
179 case Intrinsic::nvvm_ceil_ftz_f:
180 return {Intrinsic::ceil, FTZ_MustBeOn};
181 case Intrinsic::nvvm_fabs_d:
182 return {Intrinsic::fabs, FTZ_Any};
183 case Intrinsic::nvvm_floor_d:
184 return {Intrinsic::floor, FTZ_Any};
185 case Intrinsic::nvvm_floor_f:
186 return {Intrinsic::floor, FTZ_MustBeOff};
187 case Intrinsic::nvvm_floor_ftz_f:
188 return {Intrinsic::floor, FTZ_MustBeOn};
189 case Intrinsic::nvvm_fma_rn_d:
190 return {Intrinsic::fma, FTZ_Any};
191 case Intrinsic::nvvm_fma_rn_f:
192 return {Intrinsic::fma, FTZ_MustBeOff};
193 case Intrinsic::nvvm_fma_rn_ftz_f:
194 return {Intrinsic::fma, FTZ_MustBeOn};
195 case Intrinsic::nvvm_fma_rn_f16:
196 return {Intrinsic::fma, FTZ_MustBeOff,
true};
197 case Intrinsic::nvvm_fma_rn_ftz_f16:
198 return {Intrinsic::fma, FTZ_MustBeOn,
true};
199 case Intrinsic::nvvm_fma_rn_f16x2:
200 return {Intrinsic::fma, FTZ_MustBeOff,
true};
201 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
202 return {Intrinsic::fma, FTZ_MustBeOn,
true};
203 case Intrinsic::nvvm_fma_rn_bf16:
204 return {Intrinsic::fma, FTZ_MustBeOff,
true};
205 case Intrinsic::nvvm_fma_rn_ftz_bf16:
206 return {Intrinsic::fma, FTZ_MustBeOn,
true};
207 case Intrinsic::nvvm_fma_rn_bf16x2:
208 return {Intrinsic::fma, FTZ_MustBeOff,
true};
209 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
210 return {Intrinsic::fma, FTZ_MustBeOn,
true};
211 case Intrinsic::nvvm_fmax_d:
212 return {Intrinsic::maxnum, FTZ_Any};
213 case Intrinsic::nvvm_fmax_f:
214 return {Intrinsic::maxnum, FTZ_MustBeOff};
215 case Intrinsic::nvvm_fmax_ftz_f:
216 return {Intrinsic::maxnum, FTZ_MustBeOn};
217 case Intrinsic::nvvm_fmax_nan_f:
218 return {Intrinsic::maximum, FTZ_MustBeOff};
219 case Intrinsic::nvvm_fmax_ftz_nan_f:
220 return {Intrinsic::maximum, FTZ_MustBeOn};
221 case Intrinsic::nvvm_fmax_f16:
222 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
223 case Intrinsic::nvvm_fmax_ftz_f16:
224 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
225 case Intrinsic::nvvm_fmax_f16x2:
226 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
227 case Intrinsic::nvvm_fmax_ftz_f16x2:
228 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
229 case Intrinsic::nvvm_fmax_nan_f16:
230 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
231 case Intrinsic::nvvm_fmax_ftz_nan_f16:
232 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
233 case Intrinsic::nvvm_fmax_nan_f16x2:
234 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
235 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
236 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
237 case Intrinsic::nvvm_fmin_d:
238 return {Intrinsic::minnum, FTZ_Any};
239 case Intrinsic::nvvm_fmin_f:
240 return {Intrinsic::minnum, FTZ_MustBeOff};
241 case Intrinsic::nvvm_fmin_ftz_f:
242 return {Intrinsic::minnum, FTZ_MustBeOn};
243 case Intrinsic::nvvm_fmin_nan_f:
244 return {Intrinsic::minimum, FTZ_MustBeOff};
245 case Intrinsic::nvvm_fmin_ftz_nan_f:
246 return {Intrinsic::minimum, FTZ_MustBeOn};
247 case Intrinsic::nvvm_fmin_f16:
248 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
249 case Intrinsic::nvvm_fmin_ftz_f16:
250 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
251 case Intrinsic::nvvm_fmin_f16x2:
252 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
253 case Intrinsic::nvvm_fmin_ftz_f16x2:
254 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
255 case Intrinsic::nvvm_fmin_nan_f16:
256 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
257 case Intrinsic::nvvm_fmin_ftz_nan_f16:
258 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
259 case Intrinsic::nvvm_fmin_nan_f16x2:
260 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
261 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
262 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
263 case Intrinsic::nvvm_sqrt_rn_d:
264 return {Intrinsic::sqrt, FTZ_Any};
265 case Intrinsic::nvvm_sqrt_f:
270 return {Intrinsic::sqrt, FTZ_Any};
271 case Intrinsic::nvvm_trunc_d:
272 return {Intrinsic::trunc, FTZ_Any};
273 case Intrinsic::nvvm_trunc_f:
274 return {Intrinsic::trunc, FTZ_MustBeOff};
275 case Intrinsic::nvvm_trunc_ftz_f:
276 return {Intrinsic::trunc, FTZ_MustBeOn};
283 case Intrinsic::nvvm_d2i_rz:
284 case Intrinsic::nvvm_f2i_rz:
285 case Intrinsic::nvvm_d2ll_rz:
286 case Intrinsic::nvvm_f2ll_rz:
287 return {Instruction::FPToSI};
288 case Intrinsic::nvvm_d2ui_rz:
289 case Intrinsic::nvvm_f2ui_rz:
290 case Intrinsic::nvvm_d2ull_rz:
291 case Intrinsic::nvvm_f2ull_rz:
292 return {Instruction::FPToUI};
293 case Intrinsic::nvvm_i2d_rz:
294 case Intrinsic::nvvm_i2f_rz:
295 case Intrinsic::nvvm_ll2d_rz:
296 case Intrinsic::nvvm_ll2f_rz:
297 return {Instruction::SIToFP};
298 case Intrinsic::nvvm_ui2d_rz:
299 case Intrinsic::nvvm_ui2f_rz:
300 case Intrinsic::nvvm_ull2d_rz:
301 case Intrinsic::nvvm_ull2f_rz:
302 return {Instruction::UIToFP};
305 case Intrinsic::nvvm_div_rn_d:
306 return {Instruction::FDiv, FTZ_Any};
313 case Intrinsic::nvvm_rcp_rn_d:
314 return {SPC_Reciprocal, FTZ_Any};
345 if (Action.FtzRequirement != FTZ_Any) {
351 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
360 Type *Tys[] = {
II->getArgOperand(0)->getType()};
368 II->getArgOperand(1),
II->getName());
379 switch (*Action.Special) {
383 Instruction::FDiv, ConstantFP::get(
II->getArgOperand(0)->getType(), 1),
384 II->getArgOperand(0),
II->getName());
386 llvm_unreachable(
"All SpecialCase enumerators should be handled in switch.");
389std::optional<Instruction *>
419 if (LT.second.SimpleTy == MVT::i64)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isSourceOfDivergence(const Value *V)
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ ADD
Simple integer binary arithmetic operators.
@ AND
Bitwise operators - logical and, logical or, logical xor.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
bool isKernelFunction(const Function &F)
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.