19#include "llvm/IR/IntrinsicsNVPTX.h"
27#define DEBUG_TYPE "NVPTXtti"
31 switch (
II->getIntrinsicID()) {
32 default:
return false;
33 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
34 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
35 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
41 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
46 switch (
II->getIntrinsicID()) {
47 default:
return false;
48 case Intrinsic::nvvm_atomic_load_inc_32:
49 case Intrinsic::nvvm_atomic_load_dec_32:
51 case Intrinsic::nvvm_atomic_add_gen_f_cta:
52 case Intrinsic::nvvm_atomic_add_gen_f_sys:
53 case Intrinsic::nvvm_atomic_add_gen_i_cta:
54 case Intrinsic::nvvm_atomic_add_gen_i_sys:
55 case Intrinsic::nvvm_atomic_and_gen_i_cta:
56 case Intrinsic::nvvm_atomic_and_gen_i_sys:
57 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
58 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
59 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
60 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
61 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
62 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
63 case Intrinsic::nvvm_atomic_max_gen_i_cta:
64 case Intrinsic::nvvm_atomic_max_gen_i_sys:
65 case Intrinsic::nvvm_atomic_min_gen_i_cta:
66 case Intrinsic::nvvm_atomic_min_gen_i_sys:
67 case Intrinsic::nvvm_atomic_or_gen_i_cta:
68 case Intrinsic::nvvm_atomic_or_gen_i_sys:
69 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
70 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
71 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
72 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
80 if (
const Argument *Arg = dyn_cast<Argument>(V))
86 if (
const LoadInst *LI = dyn_cast<LoadInst>(
I)) {
87 unsigned AS = LI->getPointerAddressSpace();
113 if (isa<CallInst>(
I))
133 enum FtzRequirementTy {
143 SCP_FunnelShiftClamp,
148 struct SimplifyAction {
150 std::optional<Intrinsic::ID> IID;
151 std::optional<Instruction::CastOps> CastOp;
152 std::optional<Instruction::BinaryOps> BinaryOp;
153 std::optional<SpecialCase> Special;
155 FtzRequirementTy FtzRequirement = FTZ_Any;
158 bool IsHalfTy =
false;
160 SimplifyAction() =
default;
163 bool IsHalfTy =
false)
164 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
171 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
173 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
174 : Special(Special), FtzRequirement(FtzReq) {}
179 const SimplifyAction Action = [
II]() -> SimplifyAction {
180 switch (
II->getIntrinsicID()) {
182 case Intrinsic::nvvm_ceil_d:
183 return {Intrinsic::ceil, FTZ_Any};
184 case Intrinsic::nvvm_ceil_f:
185 return {Intrinsic::ceil, FTZ_MustBeOff};
186 case Intrinsic::nvvm_ceil_ftz_f:
187 return {Intrinsic::ceil, FTZ_MustBeOn};
188 case Intrinsic::nvvm_fabs_d:
189 return {Intrinsic::fabs, FTZ_Any};
190 case Intrinsic::nvvm_floor_d:
191 return {Intrinsic::floor, FTZ_Any};
192 case Intrinsic::nvvm_floor_f:
193 return {Intrinsic::floor, FTZ_MustBeOff};
194 case Intrinsic::nvvm_floor_ftz_f:
195 return {Intrinsic::floor, FTZ_MustBeOn};
196 case Intrinsic::nvvm_fma_rn_d:
197 return {Intrinsic::fma, FTZ_Any};
198 case Intrinsic::nvvm_fma_rn_f:
199 return {Intrinsic::fma, FTZ_MustBeOff};
200 case Intrinsic::nvvm_fma_rn_ftz_f:
201 return {Intrinsic::fma, FTZ_MustBeOn};
202 case Intrinsic::nvvm_fma_rn_f16:
203 return {Intrinsic::fma, FTZ_MustBeOff,
true};
204 case Intrinsic::nvvm_fma_rn_ftz_f16:
205 return {Intrinsic::fma, FTZ_MustBeOn,
true};
206 case Intrinsic::nvvm_fma_rn_f16x2:
207 return {Intrinsic::fma, FTZ_MustBeOff,
true};
208 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
209 return {Intrinsic::fma, FTZ_MustBeOn,
true};
210 case Intrinsic::nvvm_fma_rn_bf16:
211 return {Intrinsic::fma, FTZ_MustBeOff,
true};
212 case Intrinsic::nvvm_fma_rn_ftz_bf16:
213 return {Intrinsic::fma, FTZ_MustBeOn,
true};
214 case Intrinsic::nvvm_fma_rn_bf16x2:
215 return {Intrinsic::fma, FTZ_MustBeOff,
true};
216 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
217 return {Intrinsic::fma, FTZ_MustBeOn,
true};
218 case Intrinsic::nvvm_fmax_d:
219 return {Intrinsic::maxnum, FTZ_Any};
220 case Intrinsic::nvvm_fmax_f:
221 return {Intrinsic::maxnum, FTZ_MustBeOff};
222 case Intrinsic::nvvm_fmax_ftz_f:
223 return {Intrinsic::maxnum, FTZ_MustBeOn};
224 case Intrinsic::nvvm_fmax_nan_f:
225 return {Intrinsic::maximum, FTZ_MustBeOff};
226 case Intrinsic::nvvm_fmax_ftz_nan_f:
227 return {Intrinsic::maximum, FTZ_MustBeOn};
228 case Intrinsic::nvvm_fmax_f16:
229 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
230 case Intrinsic::nvvm_fmax_ftz_f16:
231 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
232 case Intrinsic::nvvm_fmax_f16x2:
233 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
234 case Intrinsic::nvvm_fmax_ftz_f16x2:
235 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
236 case Intrinsic::nvvm_fmax_nan_f16:
237 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
238 case Intrinsic::nvvm_fmax_ftz_nan_f16:
239 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
240 case Intrinsic::nvvm_fmax_nan_f16x2:
241 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
242 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
243 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
244 case Intrinsic::nvvm_fmin_d:
245 return {Intrinsic::minnum, FTZ_Any};
246 case Intrinsic::nvvm_fmin_f:
247 return {Intrinsic::minnum, FTZ_MustBeOff};
248 case Intrinsic::nvvm_fmin_ftz_f:
249 return {Intrinsic::minnum, FTZ_MustBeOn};
250 case Intrinsic::nvvm_fmin_nan_f:
251 return {Intrinsic::minimum, FTZ_MustBeOff};
252 case Intrinsic::nvvm_fmin_ftz_nan_f:
253 return {Intrinsic::minimum, FTZ_MustBeOn};
254 case Intrinsic::nvvm_fmin_f16:
255 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
256 case Intrinsic::nvvm_fmin_ftz_f16:
257 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
258 case Intrinsic::nvvm_fmin_f16x2:
259 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
260 case Intrinsic::nvvm_fmin_ftz_f16x2:
261 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
262 case Intrinsic::nvvm_fmin_nan_f16:
263 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
264 case Intrinsic::nvvm_fmin_ftz_nan_f16:
265 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
266 case Intrinsic::nvvm_fmin_nan_f16x2:
267 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
268 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
269 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
270 case Intrinsic::nvvm_sqrt_rn_d:
271 return {Intrinsic::sqrt, FTZ_Any};
272 case Intrinsic::nvvm_sqrt_f:
277 return {Intrinsic::sqrt, FTZ_Any};
278 case Intrinsic::nvvm_trunc_d:
279 return {Intrinsic::trunc, FTZ_Any};
280 case Intrinsic::nvvm_trunc_f:
281 return {Intrinsic::trunc, FTZ_MustBeOff};
282 case Intrinsic::nvvm_trunc_ftz_f:
283 return {Intrinsic::trunc, FTZ_MustBeOn};
290 case Intrinsic::nvvm_d2i_rz:
291 case Intrinsic::nvvm_f2i_rz:
292 case Intrinsic::nvvm_d2ll_rz:
293 case Intrinsic::nvvm_f2ll_rz:
294 return {Instruction::FPToSI};
295 case Intrinsic::nvvm_d2ui_rz:
296 case Intrinsic::nvvm_f2ui_rz:
297 case Intrinsic::nvvm_d2ull_rz:
298 case Intrinsic::nvvm_f2ull_rz:
299 return {Instruction::FPToUI};
301 case Intrinsic::nvvm_i2d_rn:
302 case Intrinsic::nvvm_i2f_rn:
303 case Intrinsic::nvvm_ll2d_rn:
304 case Intrinsic::nvvm_ll2f_rn:
305 return {Instruction::SIToFP};
306 case Intrinsic::nvvm_ui2d_rn:
307 case Intrinsic::nvvm_ui2f_rn:
308 case Intrinsic::nvvm_ull2d_rn:
309 case Intrinsic::nvvm_ull2f_rn:
310 return {Instruction::UIToFP};
313 case Intrinsic::nvvm_div_rn_d:
314 return {Instruction::FDiv, FTZ_Any};
321 case Intrinsic::nvvm_rcp_rn_d:
322 return {SPC_Reciprocal, FTZ_Any};
324 case Intrinsic::nvvm_fshl_clamp:
325 case Intrinsic::nvvm_fshr_clamp:
326 return {SCP_FunnelShiftClamp, FTZ_Any};
357 if (Action.FtzRequirement != FTZ_Any) {
363 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
372 Type *Tys[] = {
II->getArgOperand(0)->getType()};
381 II->getArgOperand(1),
II->getName());
392 switch (*Action.Special) {
396 Instruction::FDiv, ConstantFP::get(
II->getArgOperand(0)->getType(), 1),
397 II->getArgOperand(0),
II->getName());
399 case SCP_FunnelShiftClamp: {
402 if (
const auto *ShiftConst = dyn_cast<ConstantInt>(
II->getArgOperand(2))) {
403 const bool IsLeft =
II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp;
404 if (ShiftConst->getZExtValue() >=
II->getType()->getIntegerBitWidth())
407 const unsigned FshIID = IsLeft ? Intrinsic::fshl : Intrinsic::fshr;
409 II->getModule(), FshIID,
II->getType()),
415 llvm_unreachable(
"All SpecialCase enumerators should be handled in switch.");
424 case Intrinsic::nvvm_isspacep_global:
426 case Intrinsic::nvvm_isspacep_local:
428 case Intrinsic::nvvm_isspacep_shared:
430 case Intrinsic::nvvm_isspacep_shared_cluster:
434 : std::optional{
false};
435 case Intrinsic::nvvm_isspacep_const:
448static std::optional<Instruction *>
451 switch (
auto IID =
II.getIntrinsicID()) {
452 case Intrinsic::nvvm_isspacep_global:
453 case Intrinsic::nvvm_isspacep_local:
454 case Intrinsic::nvvm_isspacep_shared:
455 case Intrinsic::nvvm_isspacep_shared_cluster:
456 case Intrinsic::nvvm_isspacep_const: {
457 Value *Op0 =
II.getArgOperand(0);
462 if (
auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
463 AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
467 ConstantInt::get(
II.getType(), *Answer));
475std::optional<Instruction *>
507 if (LT.second.SimpleTy == MVT::i64)
536 case Intrinsic::nvvm_isspacep_const:
537 case Intrinsic::nvvm_isspacep_global:
538 case Intrinsic::nvvm_isspacep_local:
539 case Intrinsic::nvvm_isspacep_shared:
540 case Intrinsic::nvvm_isspacep_shared_cluster: {
553 case Intrinsic::nvvm_isspacep_const:
554 case Intrinsic::nvvm_isspacep_global:
555 case Intrinsic::nvvm_isspacep_local:
556 case Intrinsic::nvvm_isspacep_shared:
557 case Intrinsic::nvvm_isspacep_shared_cluster: {
560 return ConstantInt::get(
II->getType(), *R);
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
The core instruction combiner logic.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool isSourceOfDivergence(const Value *V)
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ ADD
Simple integer binary arithmetic operators.
@ AND
Bitwise operators - logical and, logical or, logical xor.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
This is an optimization pass for GlobalISel generic memory operations.
bool isKernelFunction(const Function &F)
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.