17 #include "llvm/IR/IntrinsicsNVPTX.h"
22 #define DEBUG_TYPE "NVPTXtti"
27 default:
return false;
28 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
29 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
30 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
36 return II->
getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
42 default:
return false;
43 case Intrinsic::nvvm_atomic_load_inc_32:
44 case Intrinsic::nvvm_atomic_load_dec_32:
46 case Intrinsic::nvvm_atomic_add_gen_f_cta:
47 case Intrinsic::nvvm_atomic_add_gen_f_sys:
48 case Intrinsic::nvvm_atomic_add_gen_i_cta:
49 case Intrinsic::nvvm_atomic_add_gen_i_sys:
50 case Intrinsic::nvvm_atomic_and_gen_i_cta:
51 case Intrinsic::nvvm_atomic_and_gen_i_sys:
52 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
53 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
54 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
55 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
56 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
57 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
58 case Intrinsic::nvvm_atomic_max_gen_i_cta:
59 case Intrinsic::nvvm_atomic_max_gen_i_sys:
60 case Intrinsic::nvvm_atomic_min_gen_i_cta:
61 case Intrinsic::nvvm_atomic_min_gen_i_sys:
62 case Intrinsic::nvvm_atomic_or_gen_i_cta:
63 case Intrinsic::nvvm_atomic_or_gen_i_sys:
64 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
65 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
66 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
67 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
81 if (
const LoadInst *LI = dyn_cast<LoadInst>(
I)) {
82 unsigned AS = LI->getPointerAddressSpace();
108 if (isa<CallInst>(
I))
127 enum FtzRequirementTy {
141 struct SimplifyAction {
143 std::optional<Intrinsic::ID> IID;
144 std::optional<Instruction::CastOps> CastOp;
145 std::optional<Instruction::BinaryOps> BinaryOp;
146 std::optional<SpecialCase> Special;
148 FtzRequirementTy FtzRequirement = FTZ_Any;
151 bool IsHalfTy =
false;
153 SimplifyAction() =
default;
156 bool IsHalfTy =
false)
157 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
164 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
166 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
167 : Special(Special), FtzRequirement(FtzReq) {}
172 const SimplifyAction Action = [II]() -> SimplifyAction {
175 case Intrinsic::nvvm_ceil_d:
177 case Intrinsic::nvvm_ceil_f:
179 case Intrinsic::nvvm_ceil_ftz_f:
181 case Intrinsic::nvvm_fabs_d:
182 return {Intrinsic::fabs, FTZ_Any};
183 case Intrinsic::nvvm_fabs_f:
184 return {Intrinsic::fabs, FTZ_MustBeOff};
185 case Intrinsic::nvvm_fabs_ftz_f:
186 return {Intrinsic::fabs, FTZ_MustBeOn};
187 case Intrinsic::nvvm_floor_d:
189 case Intrinsic::nvvm_floor_f:
191 case Intrinsic::nvvm_floor_ftz_f:
193 case Intrinsic::nvvm_fma_rn_d:
194 return {Intrinsic::fma, FTZ_Any};
195 case Intrinsic::nvvm_fma_rn_f:
196 return {Intrinsic::fma, FTZ_MustBeOff};
197 case Intrinsic::nvvm_fma_rn_ftz_f:
198 return {Intrinsic::fma, FTZ_MustBeOn};
199 case Intrinsic::nvvm_fma_rn_f16:
200 return {Intrinsic::fma, FTZ_MustBeOff,
true};
201 case Intrinsic::nvvm_fma_rn_ftz_f16:
202 return {Intrinsic::fma, FTZ_MustBeOn,
true};
203 case Intrinsic::nvvm_fma_rn_f16x2:
204 return {Intrinsic::fma, FTZ_MustBeOff,
true};
205 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
206 return {Intrinsic::fma, FTZ_MustBeOn,
true};
207 case Intrinsic::nvvm_fmax_d:
209 case Intrinsic::nvvm_fmax_f:
211 case Intrinsic::nvvm_fmax_ftz_f:
213 case Intrinsic::nvvm_fmax_nan_f:
215 case Intrinsic::nvvm_fmax_ftz_nan_f:
217 case Intrinsic::nvvm_fmax_f16:
219 case Intrinsic::nvvm_fmax_ftz_f16:
221 case Intrinsic::nvvm_fmax_f16x2:
223 case Intrinsic::nvvm_fmax_ftz_f16x2:
225 case Intrinsic::nvvm_fmax_nan_f16:
227 case Intrinsic::nvvm_fmax_ftz_nan_f16:
229 case Intrinsic::nvvm_fmax_nan_f16x2:
231 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
233 case Intrinsic::nvvm_fmin_d:
235 case Intrinsic::nvvm_fmin_f:
237 case Intrinsic::nvvm_fmin_ftz_f:
239 case Intrinsic::nvvm_fmin_nan_f:
241 case Intrinsic::nvvm_fmin_ftz_nan_f:
243 case Intrinsic::nvvm_fmin_f16:
245 case Intrinsic::nvvm_fmin_ftz_f16:
247 case Intrinsic::nvvm_fmin_f16x2:
249 case Intrinsic::nvvm_fmin_ftz_f16x2:
251 case Intrinsic::nvvm_fmin_nan_f16:
253 case Intrinsic::nvvm_fmin_ftz_nan_f16:
255 case Intrinsic::nvvm_fmin_nan_f16x2:
257 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
259 case Intrinsic::nvvm_round_d:
261 case Intrinsic::nvvm_round_f:
263 case Intrinsic::nvvm_round_ftz_f:
265 case Intrinsic::nvvm_sqrt_rn_d:
266 return {Intrinsic::sqrt, FTZ_Any};
267 case Intrinsic::nvvm_sqrt_f:
272 return {Intrinsic::sqrt, FTZ_Any};
273 case Intrinsic::nvvm_sqrt_rn_f:
274 return {Intrinsic::sqrt, FTZ_MustBeOff};
275 case Intrinsic::nvvm_sqrt_rn_ftz_f:
276 return {Intrinsic::sqrt, FTZ_MustBeOn};
277 case Intrinsic::nvvm_trunc_d:
279 case Intrinsic::nvvm_trunc_f:
281 case Intrinsic::nvvm_trunc_ftz_f:
289 case Intrinsic::nvvm_d2i_rz:
290 case Intrinsic::nvvm_f2i_rz:
291 case Intrinsic::nvvm_d2ll_rz:
292 case Intrinsic::nvvm_f2ll_rz:
293 return {Instruction::FPToSI};
294 case Intrinsic::nvvm_d2ui_rz:
295 case Intrinsic::nvvm_f2ui_rz:
296 case Intrinsic::nvvm_d2ull_rz:
297 case Intrinsic::nvvm_f2ull_rz:
298 return {Instruction::FPToUI};
299 case Intrinsic::nvvm_i2d_rz:
300 case Intrinsic::nvvm_i2f_rz:
301 case Intrinsic::nvvm_ll2d_rz:
302 case Intrinsic::nvvm_ll2f_rz:
303 return {Instruction::SIToFP};
304 case Intrinsic::nvvm_ui2d_rz:
305 case Intrinsic::nvvm_ui2f_rz:
306 case Intrinsic::nvvm_ull2d_rz:
307 case Intrinsic::nvvm_ull2f_rz:
308 return {Instruction::UIToFP};
311 case Intrinsic::nvvm_add_rn_d:
312 return {Instruction::FAdd, FTZ_Any};
313 case Intrinsic::nvvm_add_rn_f:
314 return {Instruction::FAdd, FTZ_MustBeOff};
315 case Intrinsic::nvvm_add_rn_ftz_f:
316 return {Instruction::FAdd, FTZ_MustBeOn};
317 case Intrinsic::nvvm_mul_rn_d:
318 return {Instruction::FMul, FTZ_Any};
319 case Intrinsic::nvvm_mul_rn_f:
320 return {Instruction::FMul, FTZ_MustBeOff};
321 case Intrinsic::nvvm_mul_rn_ftz_f:
322 return {Instruction::FMul, FTZ_MustBeOn};
323 case Intrinsic::nvvm_div_rn_d:
324 return {Instruction::FDiv, FTZ_Any};
325 case Intrinsic::nvvm_div_rn_f:
326 return {Instruction::FDiv, FTZ_MustBeOff};
327 case Intrinsic::nvvm_div_rn_ftz_f:
328 return {Instruction::FDiv, FTZ_MustBeOn};
335 case Intrinsic::nvvm_rcp_rn_d:
336 return {SPC_Reciprocal, FTZ_Any};
337 case Intrinsic::nvvm_rcp_rn_f:
338 return {SPC_Reciprocal, FTZ_MustBeOff};
339 case Intrinsic::nvvm_rcp_rn_ftz_f:
340 return {SPC_Reciprocal, FTZ_MustBeOn};
371 if (Action.FtzRequirement != FTZ_Any) {
377 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
405 switch (*Action.Special) {
412 llvm_unreachable(
"All SpecialCase enumerators should be handled in switch.");
415 std::optional<Instruction *>