LLVM 22.0.0git
NVVMIntrinsicUtils.h
Go to the documentation of this file.
1//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the definitions of the enumerations and flags
11/// associated with NVVM Intrinsics, along with some helper functions.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16#define LLVM_IR_NVVMINTRINSICUTILS_H
17
18#include <stdint.h>
19
20#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsNVPTX.h"
26
27namespace llvm {
28namespace nvvm {
29
30// Reduction Ops supported with TMA Copy from Shared
31// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
32// family of PTX instructions.
33enum class TMAReductionOp : uint8_t {
34 ADD = 0,
35 MIN = 1,
36 MAX = 2,
37 INC = 3,
38 DEC = 4,
39 AND = 5,
40 OR = 6,
41 XOR = 7,
42};
43
44// Enum to represent the cta_group::1 and
45// cta_group::2 variants in TMA/TCGEN05 family of
46// PTX instructions.
47enum class CTAGroupKind : uint8_t {
48 CG_NONE = 0, // default with no cta_group modifier
49 CG_1 = 1, // cta_group::1 modifier
50 CG_2 = 2, // cta_group::2 modifier
51};
52
53enum class Tcgen05MMAKind : uint8_t { F16 = 0, TF32 = 1, F8F6F4 = 2, I8 = 3 };
54
58 FILL = 2,
59 USE = 3,
60};
61
63 switch (IntrinsicID) {
64 case Intrinsic::nvvm_f2i_rm_ftz:
65 case Intrinsic::nvvm_f2i_rn_ftz:
66 case Intrinsic::nvvm_f2i_rp_ftz:
67 case Intrinsic::nvvm_f2i_rz_ftz:
68
69 case Intrinsic::nvvm_f2ui_rm_ftz:
70 case Intrinsic::nvvm_f2ui_rn_ftz:
71 case Intrinsic::nvvm_f2ui_rp_ftz:
72 case Intrinsic::nvvm_f2ui_rz_ftz:
73
74 case Intrinsic::nvvm_f2ll_rm_ftz:
75 case Intrinsic::nvvm_f2ll_rn_ftz:
76 case Intrinsic::nvvm_f2ll_rp_ftz:
77 case Intrinsic::nvvm_f2ll_rz_ftz:
78
79 case Intrinsic::nvvm_f2ull_rm_ftz:
80 case Intrinsic::nvvm_f2ull_rn_ftz:
81 case Intrinsic::nvvm_f2ull_rp_ftz:
82 case Intrinsic::nvvm_f2ull_rz_ftz:
83 return true;
84
85 case Intrinsic::nvvm_f2i_rm:
86 case Intrinsic::nvvm_f2i_rn:
87 case Intrinsic::nvvm_f2i_rp:
88 case Intrinsic::nvvm_f2i_rz:
89
90 case Intrinsic::nvvm_f2ui_rm:
91 case Intrinsic::nvvm_f2ui_rn:
92 case Intrinsic::nvvm_f2ui_rp:
93 case Intrinsic::nvvm_f2ui_rz:
94
95 case Intrinsic::nvvm_d2i_rm:
96 case Intrinsic::nvvm_d2i_rn:
97 case Intrinsic::nvvm_d2i_rp:
98 case Intrinsic::nvvm_d2i_rz:
99
100 case Intrinsic::nvvm_d2ui_rm:
101 case Intrinsic::nvvm_d2ui_rn:
102 case Intrinsic::nvvm_d2ui_rp:
103 case Intrinsic::nvvm_d2ui_rz:
104
105 case Intrinsic::nvvm_f2ll_rm:
106 case Intrinsic::nvvm_f2ll_rn:
107 case Intrinsic::nvvm_f2ll_rp:
108 case Intrinsic::nvvm_f2ll_rz:
109
110 case Intrinsic::nvvm_f2ull_rm:
111 case Intrinsic::nvvm_f2ull_rn:
112 case Intrinsic::nvvm_f2ull_rp:
113 case Intrinsic::nvvm_f2ull_rz:
114
115 case Intrinsic::nvvm_d2ll_rm:
116 case Intrinsic::nvvm_d2ll_rn:
117 case Intrinsic::nvvm_d2ll_rp:
118 case Intrinsic::nvvm_d2ll_rz:
119
120 case Intrinsic::nvvm_d2ull_rm:
121 case Intrinsic::nvvm_d2ull_rn:
122 case Intrinsic::nvvm_d2ull_rp:
123 case Intrinsic::nvvm_d2ull_rz:
124 return false;
125 }
126 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
127}
128
130 switch (IntrinsicID) {
131 // f2i
132 case Intrinsic::nvvm_f2i_rm:
133 case Intrinsic::nvvm_f2i_rm_ftz:
134 case Intrinsic::nvvm_f2i_rn:
135 case Intrinsic::nvvm_f2i_rn_ftz:
136 case Intrinsic::nvvm_f2i_rp:
137 case Intrinsic::nvvm_f2i_rp_ftz:
138 case Intrinsic::nvvm_f2i_rz:
139 case Intrinsic::nvvm_f2i_rz_ftz:
140 // d2i
141 case Intrinsic::nvvm_d2i_rm:
142 case Intrinsic::nvvm_d2i_rn:
143 case Intrinsic::nvvm_d2i_rp:
144 case Intrinsic::nvvm_d2i_rz:
145 // f2ll
146 case Intrinsic::nvvm_f2ll_rm:
147 case Intrinsic::nvvm_f2ll_rm_ftz:
148 case Intrinsic::nvvm_f2ll_rn:
149 case Intrinsic::nvvm_f2ll_rn_ftz:
150 case Intrinsic::nvvm_f2ll_rp:
151 case Intrinsic::nvvm_f2ll_rp_ftz:
152 case Intrinsic::nvvm_f2ll_rz:
153 case Intrinsic::nvvm_f2ll_rz_ftz:
154 // d2ll
155 case Intrinsic::nvvm_d2ll_rm:
156 case Intrinsic::nvvm_d2ll_rn:
157 case Intrinsic::nvvm_d2ll_rp:
158 case Intrinsic::nvvm_d2ll_rz:
159 return true;
160
161 // f2ui
162 case Intrinsic::nvvm_f2ui_rm:
163 case Intrinsic::nvvm_f2ui_rm_ftz:
164 case Intrinsic::nvvm_f2ui_rn:
165 case Intrinsic::nvvm_f2ui_rn_ftz:
166 case Intrinsic::nvvm_f2ui_rp:
167 case Intrinsic::nvvm_f2ui_rp_ftz:
168 case Intrinsic::nvvm_f2ui_rz:
169 case Intrinsic::nvvm_f2ui_rz_ftz:
170 // d2ui
171 case Intrinsic::nvvm_d2ui_rm:
172 case Intrinsic::nvvm_d2ui_rn:
173 case Intrinsic::nvvm_d2ui_rp:
174 case Intrinsic::nvvm_d2ui_rz:
175 // f2ull
176 case Intrinsic::nvvm_f2ull_rm:
177 case Intrinsic::nvvm_f2ull_rm_ftz:
178 case Intrinsic::nvvm_f2ull_rn:
179 case Intrinsic::nvvm_f2ull_rn_ftz:
180 case Intrinsic::nvvm_f2ull_rp:
181 case Intrinsic::nvvm_f2ull_rp_ftz:
182 case Intrinsic::nvvm_f2ull_rz:
183 case Intrinsic::nvvm_f2ull_rz_ftz:
184 // d2ull
185 case Intrinsic::nvvm_d2ull_rm:
186 case Intrinsic::nvvm_d2ull_rn:
187 case Intrinsic::nvvm_d2ull_rp:
188 case Intrinsic::nvvm_d2ull_rz:
189 return false;
190 }
192 "Checking invalid f2i/d2i intrinsic for signed int conversion");
193}
194
196 switch (IntrinsicID) {
197 // f2i
198 case Intrinsic::nvvm_f2i_rm:
199 case Intrinsic::nvvm_f2i_rn:
200 case Intrinsic::nvvm_f2i_rp:
201 case Intrinsic::nvvm_f2i_rz:
202 case Intrinsic::nvvm_f2i_rm_ftz:
203 case Intrinsic::nvvm_f2i_rn_ftz:
204 case Intrinsic::nvvm_f2i_rp_ftz:
205 case Intrinsic::nvvm_f2i_rz_ftz:
206 // f2ui
207 case Intrinsic::nvvm_f2ui_rm:
208 case Intrinsic::nvvm_f2ui_rn:
209 case Intrinsic::nvvm_f2ui_rp:
210 case Intrinsic::nvvm_f2ui_rz:
211 case Intrinsic::nvvm_f2ui_rm_ftz:
212 case Intrinsic::nvvm_f2ui_rn_ftz:
213 case Intrinsic::nvvm_f2ui_rp_ftz:
214 case Intrinsic::nvvm_f2ui_rz_ftz:
215 return true;
216 // d2i
217 case Intrinsic::nvvm_d2i_rm:
218 case Intrinsic::nvvm_d2i_rn:
219 case Intrinsic::nvvm_d2i_rp:
220 case Intrinsic::nvvm_d2i_rz:
221 // d2ui
222 case Intrinsic::nvvm_d2ui_rm:
223 case Intrinsic::nvvm_d2ui_rn:
224 case Intrinsic::nvvm_d2ui_rp:
225 case Intrinsic::nvvm_d2ui_rz:
226 // f2ll
227 case Intrinsic::nvvm_f2ll_rm:
228 case Intrinsic::nvvm_f2ll_rn:
229 case Intrinsic::nvvm_f2ll_rp:
230 case Intrinsic::nvvm_f2ll_rz:
231 case Intrinsic::nvvm_f2ll_rm_ftz:
232 case Intrinsic::nvvm_f2ll_rn_ftz:
233 case Intrinsic::nvvm_f2ll_rp_ftz:
234 case Intrinsic::nvvm_f2ll_rz_ftz:
235 // f2ull
236 case Intrinsic::nvvm_f2ull_rm:
237 case Intrinsic::nvvm_f2ull_rn:
238 case Intrinsic::nvvm_f2ull_rp:
239 case Intrinsic::nvvm_f2ull_rz:
240 case Intrinsic::nvvm_f2ull_rm_ftz:
241 case Intrinsic::nvvm_f2ull_rn_ftz:
242 case Intrinsic::nvvm_f2ull_rp_ftz:
243 case Intrinsic::nvvm_f2ull_rz_ftz:
244 // d2ll
245 case Intrinsic::nvvm_d2ll_rm:
246 case Intrinsic::nvvm_d2ll_rn:
247 case Intrinsic::nvvm_d2ll_rp:
248 case Intrinsic::nvvm_d2ll_rz:
249 // d2ull
250 case Intrinsic::nvvm_d2ull_rm:
251 case Intrinsic::nvvm_d2ull_rn:
252 case Intrinsic::nvvm_d2ull_rp:
253 case Intrinsic::nvvm_d2ull_rz:
254 return false;
255 }
256 llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
257}
258
261 switch (IntrinsicID) {
262 // RM:
263 case Intrinsic::nvvm_f2i_rm:
264 case Intrinsic::nvvm_f2ui_rm:
265 case Intrinsic::nvvm_f2i_rm_ftz:
266 case Intrinsic::nvvm_f2ui_rm_ftz:
267 case Intrinsic::nvvm_d2i_rm:
268 case Intrinsic::nvvm_d2ui_rm:
269
270 case Intrinsic::nvvm_f2ll_rm:
271 case Intrinsic::nvvm_f2ull_rm:
272 case Intrinsic::nvvm_f2ll_rm_ftz:
273 case Intrinsic::nvvm_f2ull_rm_ftz:
274 case Intrinsic::nvvm_d2ll_rm:
275 case Intrinsic::nvvm_d2ull_rm:
277
278 // RN:
279 case Intrinsic::nvvm_f2i_rn:
280 case Intrinsic::nvvm_f2ui_rn:
281 case Intrinsic::nvvm_f2i_rn_ftz:
282 case Intrinsic::nvvm_f2ui_rn_ftz:
283 case Intrinsic::nvvm_d2i_rn:
284 case Intrinsic::nvvm_d2ui_rn:
285
286 case Intrinsic::nvvm_f2ll_rn:
287 case Intrinsic::nvvm_f2ull_rn:
288 case Intrinsic::nvvm_f2ll_rn_ftz:
289 case Intrinsic::nvvm_f2ull_rn_ftz:
290 case Intrinsic::nvvm_d2ll_rn:
291 case Intrinsic::nvvm_d2ull_rn:
293
294 // RP:
295 case Intrinsic::nvvm_f2i_rp:
296 case Intrinsic::nvvm_f2ui_rp:
297 case Intrinsic::nvvm_f2i_rp_ftz:
298 case Intrinsic::nvvm_f2ui_rp_ftz:
299 case Intrinsic::nvvm_d2i_rp:
300 case Intrinsic::nvvm_d2ui_rp:
301
302 case Intrinsic::nvvm_f2ll_rp:
303 case Intrinsic::nvvm_f2ull_rp:
304 case Intrinsic::nvvm_f2ll_rp_ftz:
305 case Intrinsic::nvvm_f2ull_rp_ftz:
306 case Intrinsic::nvvm_d2ll_rp:
307 case Intrinsic::nvvm_d2ull_rp:
309
310 // RZ:
311 case Intrinsic::nvvm_f2i_rz:
312 case Intrinsic::nvvm_f2ui_rz:
313 case Intrinsic::nvvm_f2i_rz_ftz:
314 case Intrinsic::nvvm_f2ui_rz_ftz:
315 case Intrinsic::nvvm_d2i_rz:
316 case Intrinsic::nvvm_d2ui_rz:
317
318 case Intrinsic::nvvm_f2ll_rz:
319 case Intrinsic::nvvm_f2ull_rz:
320 case Intrinsic::nvvm_f2ll_rz_ftz:
321 case Intrinsic::nvvm_f2ull_rz_ftz:
322 case Intrinsic::nvvm_d2ll_rz:
323 case Intrinsic::nvvm_d2ull_rz:
325 }
326 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
327}
328
329inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
330 switch (IntrinsicID) {
331 case Intrinsic::nvvm_fmax_ftz_f:
332 case Intrinsic::nvvm_fmax_ftz_nan_f:
333 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
334 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
335
336 case Intrinsic::nvvm_fmin_ftz_f:
337 case Intrinsic::nvvm_fmin_ftz_nan_f:
338 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
339 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
340 return true;
341
342 case Intrinsic::nvvm_fmax_d:
343 case Intrinsic::nvvm_fmax_f:
344 case Intrinsic::nvvm_fmax_nan_f:
345 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
346 case Intrinsic::nvvm_fmax_xorsign_abs_f:
347
348 case Intrinsic::nvvm_fmin_d:
349 case Intrinsic::nvvm_fmin_f:
350 case Intrinsic::nvvm_fmin_nan_f:
351 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
352 case Intrinsic::nvvm_fmin_xorsign_abs_f:
353 return false;
354 }
355 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
356}
357
358inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
359 switch (IntrinsicID) {
360 case Intrinsic::nvvm_fmax_ftz_nan_f:
361 case Intrinsic::nvvm_fmax_nan_f:
362 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
363 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
364
365 case Intrinsic::nvvm_fmin_ftz_nan_f:
366 case Intrinsic::nvvm_fmin_nan_f:
367 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
368 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
369 return true;
370
371 case Intrinsic::nvvm_fmax_d:
372 case Intrinsic::nvvm_fmax_f:
373 case Intrinsic::nvvm_fmax_ftz_f:
374 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
375 case Intrinsic::nvvm_fmax_xorsign_abs_f:
376
377 case Intrinsic::nvvm_fmin_d:
378 case Intrinsic::nvvm_fmin_f:
379 case Intrinsic::nvvm_fmin_ftz_f:
380 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
381 case Intrinsic::nvvm_fmin_xorsign_abs_f:
382 return false;
383 }
384 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
385}
386
387inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
388 switch (IntrinsicID) {
389 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
390 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
391 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
392 case Intrinsic::nvvm_fmax_xorsign_abs_f:
393
394 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
395 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
396 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
397 case Intrinsic::nvvm_fmin_xorsign_abs_f:
398 return true;
399
400 case Intrinsic::nvvm_fmax_d:
401 case Intrinsic::nvvm_fmax_f:
402 case Intrinsic::nvvm_fmax_ftz_f:
403 case Intrinsic::nvvm_fmax_ftz_nan_f:
404 case Intrinsic::nvvm_fmax_nan_f:
405
406 case Intrinsic::nvvm_fmin_d:
407 case Intrinsic::nvvm_fmin_f:
408 case Intrinsic::nvvm_fmin_ftz_f:
409 case Intrinsic::nvvm_fmin_ftz_nan_f:
410 case Intrinsic::nvvm_fmin_nan_f:
411 return false;
412 }
413 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
414}
415
417 switch (IntrinsicID) {
418 case Intrinsic::nvvm_ceil_ftz_f:
419 case Intrinsic::nvvm_fabs_ftz:
420 case Intrinsic::nvvm_floor_ftz_f:
421 case Intrinsic::nvvm_round_ftz_f:
422 case Intrinsic::nvvm_saturate_ftz_f:
423 case Intrinsic::nvvm_sqrt_rn_ftz_f:
424 return true;
425 case Intrinsic::nvvm_ceil_f:
426 case Intrinsic::nvvm_ceil_d:
427 case Intrinsic::nvvm_fabs:
428 case Intrinsic::nvvm_floor_f:
429 case Intrinsic::nvvm_floor_d:
430 case Intrinsic::nvvm_round_f:
431 case Intrinsic::nvvm_round_d:
432 case Intrinsic::nvvm_saturate_d:
433 case Intrinsic::nvvm_saturate_f:
434 case Intrinsic::nvvm_sqrt_f:
435 case Intrinsic::nvvm_sqrt_rn_d:
436 case Intrinsic::nvvm_sqrt_rn_f:
437 return false;
438 }
439 llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
440}
441
442inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
443 switch (IntrinsicID) {
444 case Intrinsic::nvvm_rcp_rm_ftz_f:
445 case Intrinsic::nvvm_rcp_rn_ftz_f:
446 case Intrinsic::nvvm_rcp_rp_ftz_f:
447 case Intrinsic::nvvm_rcp_rz_ftz_f:
448 return true;
449 case Intrinsic::nvvm_rcp_rm_d:
450 case Intrinsic::nvvm_rcp_rm_f:
451 case Intrinsic::nvvm_rcp_rn_d:
452 case Intrinsic::nvvm_rcp_rn_f:
453 case Intrinsic::nvvm_rcp_rp_d:
454 case Intrinsic::nvvm_rcp_rp_f:
455 case Intrinsic::nvvm_rcp_rz_d:
456 case Intrinsic::nvvm_rcp_rz_f:
457 return false;
458 }
459 llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
460}
461
463 switch (IntrinsicID) {
464 case Intrinsic::nvvm_rcp_rm_f:
465 case Intrinsic::nvvm_rcp_rm_d:
466 case Intrinsic::nvvm_rcp_rm_ftz_f:
468
469 case Intrinsic::nvvm_rcp_rn_f:
470 case Intrinsic::nvvm_rcp_rn_d:
471 case Intrinsic::nvvm_rcp_rn_ftz_f:
473
474 case Intrinsic::nvvm_rcp_rp_f:
475 case Intrinsic::nvvm_rcp_rp_d:
476 case Intrinsic::nvvm_rcp_rp_ftz_f:
478
479 case Intrinsic::nvvm_rcp_rz_f:
480 case Intrinsic::nvvm_rcp_rz_d:
481 case Intrinsic::nvvm_rcp_rz_ftz_f:
483 }
484 llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
485}
486
487inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) {
488 if (ShouldFTZ)
490 return DenormalMode::getIEEE();
491}
492
493inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) {
494 switch (IntrinsicID) {
495 case Intrinsic::nvvm_add_rm_ftz_f:
496 case Intrinsic::nvvm_add_rn_ftz_f:
497 case Intrinsic::nvvm_add_rp_ftz_f:
498 case Intrinsic::nvvm_add_rz_ftz_f:
499 return true;
500
501 case Intrinsic::nvvm_add_rm_f:
502 case Intrinsic::nvvm_add_rn_f:
503 case Intrinsic::nvvm_add_rp_f:
504 case Intrinsic::nvvm_add_rz_f:
505 case Intrinsic::nvvm_add_rm_d:
506 case Intrinsic::nvvm_add_rn_d:
507 case Intrinsic::nvvm_add_rp_d:
508 case Intrinsic::nvvm_add_rz_d:
509 return false;
510 }
511 llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic");
512}
513
515 switch (IntrinsicID) {
516 case Intrinsic::nvvm_add_rm_f:
517 case Intrinsic::nvvm_add_rm_d:
518 case Intrinsic::nvvm_add_rm_ftz_f:
520 case Intrinsic::nvvm_add_rn_f:
521 case Intrinsic::nvvm_add_rn_d:
522 case Intrinsic::nvvm_add_rn_ftz_f:
524 case Intrinsic::nvvm_add_rp_f:
525 case Intrinsic::nvvm_add_rp_d:
526 case Intrinsic::nvvm_add_rp_ftz_f:
528 case Intrinsic::nvvm_add_rz_f:
529 case Intrinsic::nvvm_add_rz_d:
530 case Intrinsic::nvvm_add_rz_ftz_f:
532 }
533 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add");
534}
535
536inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) {
537 switch (IntrinsicID) {
538 case Intrinsic::nvvm_mul_rm_ftz_f:
539 case Intrinsic::nvvm_mul_rn_ftz_f:
540 case Intrinsic::nvvm_mul_rp_ftz_f:
541 case Intrinsic::nvvm_mul_rz_ftz_f:
542 return true;
543
544 case Intrinsic::nvvm_mul_rm_f:
545 case Intrinsic::nvvm_mul_rn_f:
546 case Intrinsic::nvvm_mul_rp_f:
547 case Intrinsic::nvvm_mul_rz_f:
548 case Intrinsic::nvvm_mul_rm_d:
549 case Intrinsic::nvvm_mul_rn_d:
550 case Intrinsic::nvvm_mul_rp_d:
551 case Intrinsic::nvvm_mul_rz_d:
552 return false;
553 }
554 llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic");
555}
556
558 switch (IntrinsicID) {
559 case Intrinsic::nvvm_mul_rm_f:
560 case Intrinsic::nvvm_mul_rm_d:
561 case Intrinsic::nvvm_mul_rm_ftz_f:
563 case Intrinsic::nvvm_mul_rn_f:
564 case Intrinsic::nvvm_mul_rn_d:
565 case Intrinsic::nvvm_mul_rn_ftz_f:
567 case Intrinsic::nvvm_mul_rp_f:
568 case Intrinsic::nvvm_mul_rp_d:
569 case Intrinsic::nvvm_mul_rp_ftz_f:
571 case Intrinsic::nvvm_mul_rz_f:
572 case Intrinsic::nvvm_mul_rz_d:
573 case Intrinsic::nvvm_mul_rz_ftz_f:
575 }
576 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul");
577}
578
579inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) {
580 switch (IntrinsicID) {
581 case Intrinsic::nvvm_div_rm_ftz_f:
582 case Intrinsic::nvvm_div_rn_ftz_f:
583 case Intrinsic::nvvm_div_rp_ftz_f:
584 case Intrinsic::nvvm_div_rz_ftz_f:
585 return true;
586
587 case Intrinsic::nvvm_div_rm_f:
588 case Intrinsic::nvvm_div_rn_f:
589 case Intrinsic::nvvm_div_rp_f:
590 case Intrinsic::nvvm_div_rz_f:
591 case Intrinsic::nvvm_div_rm_d:
592 case Intrinsic::nvvm_div_rn_d:
593 case Intrinsic::nvvm_div_rp_d:
594 case Intrinsic::nvvm_div_rz_d:
595 return false;
596 }
597 llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic");
598}
599
601 switch (IntrinsicID) {
602 case Intrinsic::nvvm_div_rm_f:
603 case Intrinsic::nvvm_div_rm_d:
604 case Intrinsic::nvvm_div_rm_ftz_f:
606 case Intrinsic::nvvm_div_rn_f:
607 case Intrinsic::nvvm_div_rn_d:
608 case Intrinsic::nvvm_div_rn_ftz_f:
610 case Intrinsic::nvvm_div_rp_f:
611 case Intrinsic::nvvm_div_rp_d:
612 case Intrinsic::nvvm_div_rp_ftz_f:
614 case Intrinsic::nvvm_div_rz_f:
615 case Intrinsic::nvvm_div_rz_d:
616 case Intrinsic::nvvm_div_rz_ftz_f:
618 }
619 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div");
620}
621
622inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) {
623 switch (IntrinsicID) {
624 case Intrinsic::nvvm_fma_rm_ftz_f:
625 case Intrinsic::nvvm_fma_rn_ftz_f:
626 case Intrinsic::nvvm_fma_rp_ftz_f:
627 case Intrinsic::nvvm_fma_rz_ftz_f:
628 return true;
629
630 case Intrinsic::nvvm_fma_rm_f:
631 case Intrinsic::nvvm_fma_rn_f:
632 case Intrinsic::nvvm_fma_rp_f:
633 case Intrinsic::nvvm_fma_rz_f:
634 case Intrinsic::nvvm_fma_rm_d:
635 case Intrinsic::nvvm_fma_rn_d:
636 case Intrinsic::nvvm_fma_rp_d:
637 case Intrinsic::nvvm_fma_rz_d:
638 return false;
639 }
640 llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic");
641}
642
644 switch (IntrinsicID) {
645 case Intrinsic::nvvm_fma_rm_f:
646 case Intrinsic::nvvm_fma_rm_d:
647 case Intrinsic::nvvm_fma_rm_ftz_f:
649 case Intrinsic::nvvm_fma_rn_f:
650 case Intrinsic::nvvm_fma_rn_d:
651 case Intrinsic::nvvm_fma_rn_ftz_f:
653 case Intrinsic::nvvm_fma_rp_f:
654 case Intrinsic::nvvm_fma_rp_d:
655 case Intrinsic::nvvm_fma_rp_ftz_f:
657 case Intrinsic::nvvm_fma_rz_f:
658 case Intrinsic::nvvm_fma_rz_d:
659 case Intrinsic::nvvm_fma_rz_ftz_f:
661 }
662 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
663}
664
665inline void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal) {
666 if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
667 uint64_t Val = CI->getZExtValue();
668 switch (static_cast<Tcgen05MMAKind>(Val)) {
670 OS << "f16";
671 return;
673 OS << "tf32";
674 return;
676 OS << "f8f6f4";
677 return;
679 OS << "i8";
680 return;
681 }
682 }
684 "printTcgen05MMAKind called with invalid value for immediate argument");
685}
686
688 const Constant *ImmArgVal) {
689 if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
690 uint64_t Val = CI->getZExtValue();
691 switch (static_cast<Tcgen05CollectorUsageOp>(Val)) {
693 OS << "discard";
694 return;
696 OS << "lastuse";
697 return;
699 OS << "fill";
700 return;
702 OS << "use";
703 return;
704 }
705 }
706 llvm_unreachable("printTcgen05CollectorUsageOp called with invalid value for "
707 "immediate argument");
708}
709
710} // namespace nvvm
711} // namespace llvm
712#endif // LLVM_IR_NVVMINTRINSICUTILS_H
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
This is an important base class in LLVM.
Definition Constant.h:43
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal)
void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()