LLVM 23.0.0git
NVVMIntrinsicUtils.h
Go to the documentation of this file.
1//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the definitions of the enumerations and flags
11/// associated with NVVM Intrinsics, along with some helper functions.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16#define LLVM_IR_NVVMINTRINSICUTILS_H
17
18#include <stdint.h>
19
20#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsNVPTX.h"
26
27namespace llvm {
28namespace nvvm {
29
30// Reduction Ops supported with TMA Copy from Shared
31// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
32// family of PTX instructions.
33enum class TMAReductionOp : uint8_t {
34 ADD = 0,
35 MIN = 1,
36 MAX = 2,
37 INC = 3,
38 DEC = 4,
39 AND = 5,
40 OR = 6,
41 XOR = 7,
42};
43
44// Enum to represent the cta_group::1 and
45// cta_group::2 variants in TMA/TCGEN05 family of
46// PTX instructions.
47enum class CTAGroupKind : uint8_t {
48 CG_NONE = 0, // default with no cta_group modifier
49 CG_1 = 1, // cta_group::1 modifier
50 CG_2 = 2, // cta_group::2 modifier
51};
52
53enum class Tcgen05MMAKind : uint8_t { F16 = 0, TF32 = 1, F8F6F4 = 2, I8 = 3 };
54
58 FILL = 2,
59 USE = 3,
60};
61
63 U8 = 0,
64 U16 = 1,
65 U32 = 2,
66 S32 = 3,
67 U64 = 4,
68 S64 = 5,
69 F16 = 6,
70 F32 = 7,
72 F64 = 9,
73 BF16 = 10,
74 TF32 = 11,
76 B4x16 = 13,
79};
80
86
94
101
106
107LLVM_ABI void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal);
108
110 const Constant *ImmArgVal);
111
113 const Constant *ImmArgVal);
115 const Constant *ImmArgVal);
117 const Constant *ImmArgVal);
119 const Constant *ImmArgVal);
121 const Constant *ImmArgVal);
122
124 switch (IntrinsicID) {
125 case Intrinsic::nvvm_f2i_rm_ftz:
126 case Intrinsic::nvvm_f2i_rn_ftz:
127 case Intrinsic::nvvm_f2i_rp_ftz:
128 case Intrinsic::nvvm_f2i_rz_ftz:
129
130 case Intrinsic::nvvm_f2ui_rm_ftz:
131 case Intrinsic::nvvm_f2ui_rn_ftz:
132 case Intrinsic::nvvm_f2ui_rp_ftz:
133 case Intrinsic::nvvm_f2ui_rz_ftz:
134
135 case Intrinsic::nvvm_f2ll_rm_ftz:
136 case Intrinsic::nvvm_f2ll_rn_ftz:
137 case Intrinsic::nvvm_f2ll_rp_ftz:
138 case Intrinsic::nvvm_f2ll_rz_ftz:
139
140 case Intrinsic::nvvm_f2ull_rm_ftz:
141 case Intrinsic::nvvm_f2ull_rn_ftz:
142 case Intrinsic::nvvm_f2ull_rp_ftz:
143 case Intrinsic::nvvm_f2ull_rz_ftz:
144 return true;
145
146 case Intrinsic::nvvm_f2i_rm:
147 case Intrinsic::nvvm_f2i_rn:
148 case Intrinsic::nvvm_f2i_rp:
149 case Intrinsic::nvvm_f2i_rz:
150
151 case Intrinsic::nvvm_f2ui_rm:
152 case Intrinsic::nvvm_f2ui_rn:
153 case Intrinsic::nvvm_f2ui_rp:
154 case Intrinsic::nvvm_f2ui_rz:
155
156 case Intrinsic::nvvm_d2i_rm:
157 case Intrinsic::nvvm_d2i_rn:
158 case Intrinsic::nvvm_d2i_rp:
159 case Intrinsic::nvvm_d2i_rz:
160
161 case Intrinsic::nvvm_d2ui_rm:
162 case Intrinsic::nvvm_d2ui_rn:
163 case Intrinsic::nvvm_d2ui_rp:
164 case Intrinsic::nvvm_d2ui_rz:
165
166 case Intrinsic::nvvm_f2ll_rm:
167 case Intrinsic::nvvm_f2ll_rn:
168 case Intrinsic::nvvm_f2ll_rp:
169 case Intrinsic::nvvm_f2ll_rz:
170
171 case Intrinsic::nvvm_f2ull_rm:
172 case Intrinsic::nvvm_f2ull_rn:
173 case Intrinsic::nvvm_f2ull_rp:
174 case Intrinsic::nvvm_f2ull_rz:
175
176 case Intrinsic::nvvm_d2ll_rm:
177 case Intrinsic::nvvm_d2ll_rn:
178 case Intrinsic::nvvm_d2ll_rp:
179 case Intrinsic::nvvm_d2ll_rz:
180
181 case Intrinsic::nvvm_d2ull_rm:
182 case Intrinsic::nvvm_d2ull_rn:
183 case Intrinsic::nvvm_d2ull_rp:
184 case Intrinsic::nvvm_d2ull_rz:
185 return false;
186 }
187 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
188}
189
191 switch (IntrinsicID) {
192 // f2i
193 case Intrinsic::nvvm_f2i_rm:
194 case Intrinsic::nvvm_f2i_rm_ftz:
195 case Intrinsic::nvvm_f2i_rn:
196 case Intrinsic::nvvm_f2i_rn_ftz:
197 case Intrinsic::nvvm_f2i_rp:
198 case Intrinsic::nvvm_f2i_rp_ftz:
199 case Intrinsic::nvvm_f2i_rz:
200 case Intrinsic::nvvm_f2i_rz_ftz:
201 // d2i
202 case Intrinsic::nvvm_d2i_rm:
203 case Intrinsic::nvvm_d2i_rn:
204 case Intrinsic::nvvm_d2i_rp:
205 case Intrinsic::nvvm_d2i_rz:
206 // f2ll
207 case Intrinsic::nvvm_f2ll_rm:
208 case Intrinsic::nvvm_f2ll_rm_ftz:
209 case Intrinsic::nvvm_f2ll_rn:
210 case Intrinsic::nvvm_f2ll_rn_ftz:
211 case Intrinsic::nvvm_f2ll_rp:
212 case Intrinsic::nvvm_f2ll_rp_ftz:
213 case Intrinsic::nvvm_f2ll_rz:
214 case Intrinsic::nvvm_f2ll_rz_ftz:
215 // d2ll
216 case Intrinsic::nvvm_d2ll_rm:
217 case Intrinsic::nvvm_d2ll_rn:
218 case Intrinsic::nvvm_d2ll_rp:
219 case Intrinsic::nvvm_d2ll_rz:
220 return true;
221
222 // f2ui
223 case Intrinsic::nvvm_f2ui_rm:
224 case Intrinsic::nvvm_f2ui_rm_ftz:
225 case Intrinsic::nvvm_f2ui_rn:
226 case Intrinsic::nvvm_f2ui_rn_ftz:
227 case Intrinsic::nvvm_f2ui_rp:
228 case Intrinsic::nvvm_f2ui_rp_ftz:
229 case Intrinsic::nvvm_f2ui_rz:
230 case Intrinsic::nvvm_f2ui_rz_ftz:
231 // d2ui
232 case Intrinsic::nvvm_d2ui_rm:
233 case Intrinsic::nvvm_d2ui_rn:
234 case Intrinsic::nvvm_d2ui_rp:
235 case Intrinsic::nvvm_d2ui_rz:
236 // f2ull
237 case Intrinsic::nvvm_f2ull_rm:
238 case Intrinsic::nvvm_f2ull_rm_ftz:
239 case Intrinsic::nvvm_f2ull_rn:
240 case Intrinsic::nvvm_f2ull_rn_ftz:
241 case Intrinsic::nvvm_f2ull_rp:
242 case Intrinsic::nvvm_f2ull_rp_ftz:
243 case Intrinsic::nvvm_f2ull_rz:
244 case Intrinsic::nvvm_f2ull_rz_ftz:
245 // d2ull
246 case Intrinsic::nvvm_d2ull_rm:
247 case Intrinsic::nvvm_d2ull_rn:
248 case Intrinsic::nvvm_d2ull_rp:
249 case Intrinsic::nvvm_d2ull_rz:
250 return false;
251 }
253 "Checking invalid f2i/d2i intrinsic for signed int conversion");
254}
255
257 switch (IntrinsicID) {
258 // f2i
259 case Intrinsic::nvvm_f2i_rm:
260 case Intrinsic::nvvm_f2i_rn:
261 case Intrinsic::nvvm_f2i_rp:
262 case Intrinsic::nvvm_f2i_rz:
263 case Intrinsic::nvvm_f2i_rm_ftz:
264 case Intrinsic::nvvm_f2i_rn_ftz:
265 case Intrinsic::nvvm_f2i_rp_ftz:
266 case Intrinsic::nvvm_f2i_rz_ftz:
267 // f2ui
268 case Intrinsic::nvvm_f2ui_rm:
269 case Intrinsic::nvvm_f2ui_rn:
270 case Intrinsic::nvvm_f2ui_rp:
271 case Intrinsic::nvvm_f2ui_rz:
272 case Intrinsic::nvvm_f2ui_rm_ftz:
273 case Intrinsic::nvvm_f2ui_rn_ftz:
274 case Intrinsic::nvvm_f2ui_rp_ftz:
275 case Intrinsic::nvvm_f2ui_rz_ftz:
276 return true;
277 // d2i
278 case Intrinsic::nvvm_d2i_rm:
279 case Intrinsic::nvvm_d2i_rn:
280 case Intrinsic::nvvm_d2i_rp:
281 case Intrinsic::nvvm_d2i_rz:
282 // d2ui
283 case Intrinsic::nvvm_d2ui_rm:
284 case Intrinsic::nvvm_d2ui_rn:
285 case Intrinsic::nvvm_d2ui_rp:
286 case Intrinsic::nvvm_d2ui_rz:
287 // f2ll
288 case Intrinsic::nvvm_f2ll_rm:
289 case Intrinsic::nvvm_f2ll_rn:
290 case Intrinsic::nvvm_f2ll_rp:
291 case Intrinsic::nvvm_f2ll_rz:
292 case Intrinsic::nvvm_f2ll_rm_ftz:
293 case Intrinsic::nvvm_f2ll_rn_ftz:
294 case Intrinsic::nvvm_f2ll_rp_ftz:
295 case Intrinsic::nvvm_f2ll_rz_ftz:
296 // f2ull
297 case Intrinsic::nvvm_f2ull_rm:
298 case Intrinsic::nvvm_f2ull_rn:
299 case Intrinsic::nvvm_f2ull_rp:
300 case Intrinsic::nvvm_f2ull_rz:
301 case Intrinsic::nvvm_f2ull_rm_ftz:
302 case Intrinsic::nvvm_f2ull_rn_ftz:
303 case Intrinsic::nvvm_f2ull_rp_ftz:
304 case Intrinsic::nvvm_f2ull_rz_ftz:
305 // d2ll
306 case Intrinsic::nvvm_d2ll_rm:
307 case Intrinsic::nvvm_d2ll_rn:
308 case Intrinsic::nvvm_d2ll_rp:
309 case Intrinsic::nvvm_d2ll_rz:
310 // d2ull
311 case Intrinsic::nvvm_d2ull_rm:
312 case Intrinsic::nvvm_d2ull_rn:
313 case Intrinsic::nvvm_d2ull_rp:
314 case Intrinsic::nvvm_d2ull_rz:
315 return false;
316 }
317 llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
318}
319
322 switch (IntrinsicID) {
323 // RM:
324 case Intrinsic::nvvm_f2i_rm:
325 case Intrinsic::nvvm_f2ui_rm:
326 case Intrinsic::nvvm_f2i_rm_ftz:
327 case Intrinsic::nvvm_f2ui_rm_ftz:
328 case Intrinsic::nvvm_d2i_rm:
329 case Intrinsic::nvvm_d2ui_rm:
330
331 case Intrinsic::nvvm_f2ll_rm:
332 case Intrinsic::nvvm_f2ull_rm:
333 case Intrinsic::nvvm_f2ll_rm_ftz:
334 case Intrinsic::nvvm_f2ull_rm_ftz:
335 case Intrinsic::nvvm_d2ll_rm:
336 case Intrinsic::nvvm_d2ull_rm:
338
339 // RN:
340 case Intrinsic::nvvm_f2i_rn:
341 case Intrinsic::nvvm_f2ui_rn:
342 case Intrinsic::nvvm_f2i_rn_ftz:
343 case Intrinsic::nvvm_f2ui_rn_ftz:
344 case Intrinsic::nvvm_d2i_rn:
345 case Intrinsic::nvvm_d2ui_rn:
346
347 case Intrinsic::nvvm_f2ll_rn:
348 case Intrinsic::nvvm_f2ull_rn:
349 case Intrinsic::nvvm_f2ll_rn_ftz:
350 case Intrinsic::nvvm_f2ull_rn_ftz:
351 case Intrinsic::nvvm_d2ll_rn:
352 case Intrinsic::nvvm_d2ull_rn:
354
355 // RP:
356 case Intrinsic::nvvm_f2i_rp:
357 case Intrinsic::nvvm_f2ui_rp:
358 case Intrinsic::nvvm_f2i_rp_ftz:
359 case Intrinsic::nvvm_f2ui_rp_ftz:
360 case Intrinsic::nvvm_d2i_rp:
361 case Intrinsic::nvvm_d2ui_rp:
362
363 case Intrinsic::nvvm_f2ll_rp:
364 case Intrinsic::nvvm_f2ull_rp:
365 case Intrinsic::nvvm_f2ll_rp_ftz:
366 case Intrinsic::nvvm_f2ull_rp_ftz:
367 case Intrinsic::nvvm_d2ll_rp:
368 case Intrinsic::nvvm_d2ull_rp:
370
371 // RZ:
372 case Intrinsic::nvvm_f2i_rz:
373 case Intrinsic::nvvm_f2ui_rz:
374 case Intrinsic::nvvm_f2i_rz_ftz:
375 case Intrinsic::nvvm_f2ui_rz_ftz:
376 case Intrinsic::nvvm_d2i_rz:
377 case Intrinsic::nvvm_d2ui_rz:
378
379 case Intrinsic::nvvm_f2ll_rz:
380 case Intrinsic::nvvm_f2ull_rz:
381 case Intrinsic::nvvm_f2ll_rz_ftz:
382 case Intrinsic::nvvm_f2ull_rz_ftz:
383 case Intrinsic::nvvm_d2ll_rz:
384 case Intrinsic::nvvm_d2ull_rz:
386 }
387 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
388}
389
390inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
391 switch (IntrinsicID) {
392 case Intrinsic::nvvm_fmax_ftz_f:
393 case Intrinsic::nvvm_fmax_ftz_nan_f:
394 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
395 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
396
397 case Intrinsic::nvvm_fmin_ftz_f:
398 case Intrinsic::nvvm_fmin_ftz_nan_f:
399 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
400 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
401 return true;
402
403 case Intrinsic::nvvm_fmax_d:
404 case Intrinsic::nvvm_fmax_f:
405 case Intrinsic::nvvm_fmax_nan_f:
406 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
407 case Intrinsic::nvvm_fmax_xorsign_abs_f:
408
409 case Intrinsic::nvvm_fmin_d:
410 case Intrinsic::nvvm_fmin_f:
411 case Intrinsic::nvvm_fmin_nan_f:
412 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
413 case Intrinsic::nvvm_fmin_xorsign_abs_f:
414 return false;
415 }
416 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
417}
418
419inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
420 switch (IntrinsicID) {
421 case Intrinsic::nvvm_fmax_ftz_nan_f:
422 case Intrinsic::nvvm_fmax_nan_f:
423 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
424 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
425
426 case Intrinsic::nvvm_fmin_ftz_nan_f:
427 case Intrinsic::nvvm_fmin_nan_f:
428 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
429 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
430 return true;
431
432 case Intrinsic::nvvm_fmax_d:
433 case Intrinsic::nvvm_fmax_f:
434 case Intrinsic::nvvm_fmax_ftz_f:
435 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
436 case Intrinsic::nvvm_fmax_xorsign_abs_f:
437
438 case Intrinsic::nvvm_fmin_d:
439 case Intrinsic::nvvm_fmin_f:
440 case Intrinsic::nvvm_fmin_ftz_f:
441 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
442 case Intrinsic::nvvm_fmin_xorsign_abs_f:
443 return false;
444 }
445 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
446}
447
448inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
449 switch (IntrinsicID) {
450 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
451 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
452 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
453 case Intrinsic::nvvm_fmax_xorsign_abs_f:
454
455 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
456 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
457 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
458 case Intrinsic::nvvm_fmin_xorsign_abs_f:
459 return true;
460
461 case Intrinsic::nvvm_fmax_d:
462 case Intrinsic::nvvm_fmax_f:
463 case Intrinsic::nvvm_fmax_ftz_f:
464 case Intrinsic::nvvm_fmax_ftz_nan_f:
465 case Intrinsic::nvvm_fmax_nan_f:
466
467 case Intrinsic::nvvm_fmin_d:
468 case Intrinsic::nvvm_fmin_f:
469 case Intrinsic::nvvm_fmin_ftz_f:
470 case Intrinsic::nvvm_fmin_ftz_nan_f:
471 case Intrinsic::nvvm_fmin_nan_f:
472 return false;
473 }
474 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
475}
476
478 switch (IntrinsicID) {
479 case Intrinsic::nvvm_ceil_ftz_f:
480 case Intrinsic::nvvm_fabs_ftz:
481 case Intrinsic::nvvm_floor_ftz_f:
482 case Intrinsic::nvvm_round_ftz_f:
483 case Intrinsic::nvvm_saturate_ftz_f:
484 case Intrinsic::nvvm_sqrt_rn_ftz_f:
485 return true;
486 case Intrinsic::nvvm_ceil_f:
487 case Intrinsic::nvvm_ceil_d:
488 case Intrinsic::nvvm_fabs:
489 case Intrinsic::nvvm_floor_f:
490 case Intrinsic::nvvm_floor_d:
491 case Intrinsic::nvvm_round_f:
492 case Intrinsic::nvvm_round_d:
493 case Intrinsic::nvvm_saturate_d:
494 case Intrinsic::nvvm_saturate_f:
495 case Intrinsic::nvvm_sqrt_f:
496 case Intrinsic::nvvm_sqrt_rn_d:
497 case Intrinsic::nvvm_sqrt_rn_f:
498 return false;
499 }
500 llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
501}
502
503inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
504 switch (IntrinsicID) {
505 case Intrinsic::nvvm_rcp_rm_ftz_f:
506 case Intrinsic::nvvm_rcp_rn_ftz_f:
507 case Intrinsic::nvvm_rcp_rp_ftz_f:
508 case Intrinsic::nvvm_rcp_rz_ftz_f:
509 return true;
510 case Intrinsic::nvvm_rcp_rm_d:
511 case Intrinsic::nvvm_rcp_rm_f:
512 case Intrinsic::nvvm_rcp_rn_d:
513 case Intrinsic::nvvm_rcp_rn_f:
514 case Intrinsic::nvvm_rcp_rp_d:
515 case Intrinsic::nvvm_rcp_rp_f:
516 case Intrinsic::nvvm_rcp_rz_d:
517 case Intrinsic::nvvm_rcp_rz_f:
518 return false;
519 }
520 llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
521}
522
524 switch (IntrinsicID) {
525 case Intrinsic::nvvm_rcp_rm_f:
526 case Intrinsic::nvvm_rcp_rm_d:
527 case Intrinsic::nvvm_rcp_rm_ftz_f:
529
530 case Intrinsic::nvvm_rcp_rn_f:
531 case Intrinsic::nvvm_rcp_rn_d:
532 case Intrinsic::nvvm_rcp_rn_ftz_f:
534
535 case Intrinsic::nvvm_rcp_rp_f:
536 case Intrinsic::nvvm_rcp_rp_d:
537 case Intrinsic::nvvm_rcp_rp_ftz_f:
539
540 case Intrinsic::nvvm_rcp_rz_f:
541 case Intrinsic::nvvm_rcp_rz_d:
542 case Intrinsic::nvvm_rcp_rz_ftz_f:
544 }
545 llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
546}
547
548inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) {
549 if (ShouldFTZ)
551 return DenormalMode::getIEEE();
552}
553
554inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) {
555 switch (IntrinsicID) {
556 case Intrinsic::nvvm_add_rm_ftz_f:
557 case Intrinsic::nvvm_add_rn_ftz_f:
558 case Intrinsic::nvvm_add_rp_ftz_f:
559 case Intrinsic::nvvm_add_rz_ftz_f:
560 return true;
561
562 case Intrinsic::nvvm_add_rm_f:
563 case Intrinsic::nvvm_add_rn_f:
564 case Intrinsic::nvvm_add_rp_f:
565 case Intrinsic::nvvm_add_rz_f:
566 case Intrinsic::nvvm_add_rm_d:
567 case Intrinsic::nvvm_add_rn_d:
568 case Intrinsic::nvvm_add_rp_d:
569 case Intrinsic::nvvm_add_rz_d:
570 return false;
571 }
572 llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic");
573}
574
576 switch (IntrinsicID) {
577 case Intrinsic::nvvm_add_rm_f:
578 case Intrinsic::nvvm_add_rm_d:
579 case Intrinsic::nvvm_add_rm_ftz_f:
581 case Intrinsic::nvvm_add_rn_f:
582 case Intrinsic::nvvm_add_rn_d:
583 case Intrinsic::nvvm_add_rn_ftz_f:
585 case Intrinsic::nvvm_add_rp_f:
586 case Intrinsic::nvvm_add_rp_d:
587 case Intrinsic::nvvm_add_rp_ftz_f:
589 case Intrinsic::nvvm_add_rz_f:
590 case Intrinsic::nvvm_add_rz_d:
591 case Intrinsic::nvvm_add_rz_ftz_f:
593 }
594 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add");
595}
596
597inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) {
598 switch (IntrinsicID) {
599 case Intrinsic::nvvm_mul_rm_ftz_f:
600 case Intrinsic::nvvm_mul_rn_ftz_f:
601 case Intrinsic::nvvm_mul_rp_ftz_f:
602 case Intrinsic::nvvm_mul_rz_ftz_f:
603 return true;
604
605 case Intrinsic::nvvm_mul_rm_f:
606 case Intrinsic::nvvm_mul_rn_f:
607 case Intrinsic::nvvm_mul_rp_f:
608 case Intrinsic::nvvm_mul_rz_f:
609 case Intrinsic::nvvm_mul_rm_d:
610 case Intrinsic::nvvm_mul_rn_d:
611 case Intrinsic::nvvm_mul_rp_d:
612 case Intrinsic::nvvm_mul_rz_d:
613 return false;
614 }
615 llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic");
616}
617
619 switch (IntrinsicID) {
620 case Intrinsic::nvvm_mul_rm_f:
621 case Intrinsic::nvvm_mul_rm_d:
622 case Intrinsic::nvvm_mul_rm_ftz_f:
624 case Intrinsic::nvvm_mul_rn_f:
625 case Intrinsic::nvvm_mul_rn_d:
626 case Intrinsic::nvvm_mul_rn_ftz_f:
628 case Intrinsic::nvvm_mul_rp_f:
629 case Intrinsic::nvvm_mul_rp_d:
630 case Intrinsic::nvvm_mul_rp_ftz_f:
632 case Intrinsic::nvvm_mul_rz_f:
633 case Intrinsic::nvvm_mul_rz_d:
634 case Intrinsic::nvvm_mul_rz_ftz_f:
636 }
637 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul");
638}
639
640inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) {
641 switch (IntrinsicID) {
642 case Intrinsic::nvvm_div_rm_ftz_f:
643 case Intrinsic::nvvm_div_rn_ftz_f:
644 case Intrinsic::nvvm_div_rp_ftz_f:
645 case Intrinsic::nvvm_div_rz_ftz_f:
646 return true;
647
648 case Intrinsic::nvvm_div_rm_f:
649 case Intrinsic::nvvm_div_rn_f:
650 case Intrinsic::nvvm_div_rp_f:
651 case Intrinsic::nvvm_div_rz_f:
652 case Intrinsic::nvvm_div_rm_d:
653 case Intrinsic::nvvm_div_rn_d:
654 case Intrinsic::nvvm_div_rp_d:
655 case Intrinsic::nvvm_div_rz_d:
656 return false;
657 }
658 llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic");
659}
660
662 switch (IntrinsicID) {
663 case Intrinsic::nvvm_div_rm_f:
664 case Intrinsic::nvvm_div_rm_d:
665 case Intrinsic::nvvm_div_rm_ftz_f:
667 case Intrinsic::nvvm_div_rn_f:
668 case Intrinsic::nvvm_div_rn_d:
669 case Intrinsic::nvvm_div_rn_ftz_f:
671 case Intrinsic::nvvm_div_rp_f:
672 case Intrinsic::nvvm_div_rp_d:
673 case Intrinsic::nvvm_div_rp_ftz_f:
675 case Intrinsic::nvvm_div_rz_f:
676 case Intrinsic::nvvm_div_rz_d:
677 case Intrinsic::nvvm_div_rz_ftz_f:
679 }
680 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div");
681}
682
683inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) {
684 switch (IntrinsicID) {
685 case Intrinsic::nvvm_fma_rm_ftz_f:
686 case Intrinsic::nvvm_fma_rn_ftz_f:
687 case Intrinsic::nvvm_fma_rp_ftz_f:
688 case Intrinsic::nvvm_fma_rz_ftz_f:
689 return true;
690
691 case Intrinsic::nvvm_fma_rm_f:
692 case Intrinsic::nvvm_fma_rn_f:
693 case Intrinsic::nvvm_fma_rp_f:
694 case Intrinsic::nvvm_fma_rz_f:
695 case Intrinsic::nvvm_fma_rm_d:
696 case Intrinsic::nvvm_fma_rn_d:
697 case Intrinsic::nvvm_fma_rp_d:
698 case Intrinsic::nvvm_fma_rz_d:
699 return false;
700 }
701 llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic");
702}
703
705 switch (IntrinsicID) {
706 case Intrinsic::nvvm_fma_rm_f:
707 case Intrinsic::nvvm_fma_rm_d:
708 case Intrinsic::nvvm_fma_rm_ftz_f:
710 case Intrinsic::nvvm_fma_rn_f:
711 case Intrinsic::nvvm_fma_rn_d:
712 case Intrinsic::nvvm_fma_rn_ftz_f:
714 case Intrinsic::nvvm_fma_rp_f:
715 case Intrinsic::nvvm_fma_rp_d:
716 case Intrinsic::nvvm_fma_rp_ftz_f:
718 case Intrinsic::nvvm_fma_rz_f:
719 case Intrinsic::nvvm_fma_rz_d:
720 case Intrinsic::nvvm_fma_rz_ftz_f:
722 }
723 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
724}
725
726} // namespace nvvm
727} // namespace llvm
728#endif // LLVM_IR_NVVMINTRINSICUTILS_H
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
This is an important base class in LLVM.
Definition Constant.h:43
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTensormapSwizzleMode(raw_ostream &OS, const Constant *ImmArgVal)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTensormapInterleaveLayout(raw_ostream &OS, const Constant *ImmArgVal)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTensormapSwizzleAtomicity(raw_ostream &OS, const Constant *ImmArgVal)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal)
LLVM_ABI void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTensormapFillMode(raw_ostream &OS, const Constant *ImmArgVal)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
LLVM_ABI void printTensormapElemType(raw_ostream &OS, const Constant *ImmArgVal)
This is an optimization pass for GlobalISel generic memory operations.
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()