LLVM 20.0.0git
NVVMIntrinsicUtils.h
Go to the documentation of this file.
1//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the definitions of the enumerations and flags
11/// associated with NVVM Intrinsics, along with some helper functions.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16#define LLVM_IR_NVVMINTRINSICUTILS_H
17
18#include <stdint.h>
19
20#include "llvm/ADT/APFloat.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsNVPTX.h"
23
24namespace llvm {
25namespace nvvm {
26
27// Reduction Ops supported with TMA Copy from Shared
28// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
29// family of PTX instructions.
30enum class TMAReductionOp : uint8_t {
31 ADD = 0,
32 MIN = 1,
33 MAX = 2,
34 INC = 3,
35 DEC = 4,
36 AND = 5,
37 OR = 6,
38 XOR = 7,
39};
40
42 switch (IntrinsicID) {
43 case Intrinsic::nvvm_f2i_rm_ftz:
44 case Intrinsic::nvvm_f2i_rn_ftz:
45 case Intrinsic::nvvm_f2i_rp_ftz:
46 case Intrinsic::nvvm_f2i_rz_ftz:
47
48 case Intrinsic::nvvm_f2ui_rm_ftz:
49 case Intrinsic::nvvm_f2ui_rn_ftz:
50 case Intrinsic::nvvm_f2ui_rp_ftz:
51 case Intrinsic::nvvm_f2ui_rz_ftz:
52
53 case Intrinsic::nvvm_f2ll_rm_ftz:
54 case Intrinsic::nvvm_f2ll_rn_ftz:
55 case Intrinsic::nvvm_f2ll_rp_ftz:
56 case Intrinsic::nvvm_f2ll_rz_ftz:
57
58 case Intrinsic::nvvm_f2ull_rm_ftz:
59 case Intrinsic::nvvm_f2ull_rn_ftz:
60 case Intrinsic::nvvm_f2ull_rp_ftz:
61 case Intrinsic::nvvm_f2ull_rz_ftz:
62 return true;
63
64 case Intrinsic::nvvm_f2i_rm:
65 case Intrinsic::nvvm_f2i_rn:
66 case Intrinsic::nvvm_f2i_rp:
67 case Intrinsic::nvvm_f2i_rz:
68
69 case Intrinsic::nvvm_f2ui_rm:
70 case Intrinsic::nvvm_f2ui_rn:
71 case Intrinsic::nvvm_f2ui_rp:
72 case Intrinsic::nvvm_f2ui_rz:
73
74 case Intrinsic::nvvm_d2i_rm:
75 case Intrinsic::nvvm_d2i_rn:
76 case Intrinsic::nvvm_d2i_rp:
77 case Intrinsic::nvvm_d2i_rz:
78
79 case Intrinsic::nvvm_d2ui_rm:
80 case Intrinsic::nvvm_d2ui_rn:
81 case Intrinsic::nvvm_d2ui_rp:
82 case Intrinsic::nvvm_d2ui_rz:
83
84 case Intrinsic::nvvm_f2ll_rm:
85 case Intrinsic::nvvm_f2ll_rn:
86 case Intrinsic::nvvm_f2ll_rp:
87 case Intrinsic::nvvm_f2ll_rz:
88
89 case Intrinsic::nvvm_f2ull_rm:
90 case Intrinsic::nvvm_f2ull_rn:
91 case Intrinsic::nvvm_f2ull_rp:
92 case Intrinsic::nvvm_f2ull_rz:
93
94 case Intrinsic::nvvm_d2ll_rm:
95 case Intrinsic::nvvm_d2ll_rn:
96 case Intrinsic::nvvm_d2ll_rp:
97 case Intrinsic::nvvm_d2ll_rz:
98
99 case Intrinsic::nvvm_d2ull_rm:
100 case Intrinsic::nvvm_d2ull_rn:
101 case Intrinsic::nvvm_d2ull_rp:
102 case Intrinsic::nvvm_d2ull_rz:
103 return false;
104 }
105 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
106 return false;
107}
108
110 switch (IntrinsicID) {
111 // f2i
112 case Intrinsic::nvvm_f2i_rm:
113 case Intrinsic::nvvm_f2i_rm_ftz:
114 case Intrinsic::nvvm_f2i_rn:
115 case Intrinsic::nvvm_f2i_rn_ftz:
116 case Intrinsic::nvvm_f2i_rp:
117 case Intrinsic::nvvm_f2i_rp_ftz:
118 case Intrinsic::nvvm_f2i_rz:
119 case Intrinsic::nvvm_f2i_rz_ftz:
120 // d2i
121 case Intrinsic::nvvm_d2i_rm:
122 case Intrinsic::nvvm_d2i_rn:
123 case Intrinsic::nvvm_d2i_rp:
124 case Intrinsic::nvvm_d2i_rz:
125 // f2ll
126 case Intrinsic::nvvm_f2ll_rm:
127 case Intrinsic::nvvm_f2ll_rm_ftz:
128 case Intrinsic::nvvm_f2ll_rn:
129 case Intrinsic::nvvm_f2ll_rn_ftz:
130 case Intrinsic::nvvm_f2ll_rp:
131 case Intrinsic::nvvm_f2ll_rp_ftz:
132 case Intrinsic::nvvm_f2ll_rz:
133 case Intrinsic::nvvm_f2ll_rz_ftz:
134 // d2ll
135 case Intrinsic::nvvm_d2ll_rm:
136 case Intrinsic::nvvm_d2ll_rn:
137 case Intrinsic::nvvm_d2ll_rp:
138 case Intrinsic::nvvm_d2ll_rz:
139 return true;
140
141 // f2ui
142 case Intrinsic::nvvm_f2ui_rm:
143 case Intrinsic::nvvm_f2ui_rm_ftz:
144 case Intrinsic::nvvm_f2ui_rn:
145 case Intrinsic::nvvm_f2ui_rn_ftz:
146 case Intrinsic::nvvm_f2ui_rp:
147 case Intrinsic::nvvm_f2ui_rp_ftz:
148 case Intrinsic::nvvm_f2ui_rz:
149 case Intrinsic::nvvm_f2ui_rz_ftz:
150 // d2ui
151 case Intrinsic::nvvm_d2ui_rm:
152 case Intrinsic::nvvm_d2ui_rn:
153 case Intrinsic::nvvm_d2ui_rp:
154 case Intrinsic::nvvm_d2ui_rz:
155 // f2ull
156 case Intrinsic::nvvm_f2ull_rm:
157 case Intrinsic::nvvm_f2ull_rm_ftz:
158 case Intrinsic::nvvm_f2ull_rn:
159 case Intrinsic::nvvm_f2ull_rn_ftz:
160 case Intrinsic::nvvm_f2ull_rp:
161 case Intrinsic::nvvm_f2ull_rp_ftz:
162 case Intrinsic::nvvm_f2ull_rz:
163 case Intrinsic::nvvm_f2ull_rz_ftz:
164 // d2ull
165 case Intrinsic::nvvm_d2ull_rm:
166 case Intrinsic::nvvm_d2ull_rn:
167 case Intrinsic::nvvm_d2ull_rp:
168 case Intrinsic::nvvm_d2ull_rz:
169 return false;
170 }
172 "Checking invalid f2i/d2i intrinsic for signed int conversion");
173 return false;
174}
175
178 switch (IntrinsicID) {
179 // RM:
180 case Intrinsic::nvvm_f2i_rm:
181 case Intrinsic::nvvm_f2ui_rm:
182 case Intrinsic::nvvm_f2i_rm_ftz:
183 case Intrinsic::nvvm_f2ui_rm_ftz:
184 case Intrinsic::nvvm_d2i_rm:
185 case Intrinsic::nvvm_d2ui_rm:
186
187 case Intrinsic::nvvm_f2ll_rm:
188 case Intrinsic::nvvm_f2ull_rm:
189 case Intrinsic::nvvm_f2ll_rm_ftz:
190 case Intrinsic::nvvm_f2ull_rm_ftz:
191 case Intrinsic::nvvm_d2ll_rm:
192 case Intrinsic::nvvm_d2ull_rm:
194
195 // RN:
196 case Intrinsic::nvvm_f2i_rn:
197 case Intrinsic::nvvm_f2ui_rn:
198 case Intrinsic::nvvm_f2i_rn_ftz:
199 case Intrinsic::nvvm_f2ui_rn_ftz:
200 case Intrinsic::nvvm_d2i_rn:
201 case Intrinsic::nvvm_d2ui_rn:
202
203 case Intrinsic::nvvm_f2ll_rn:
204 case Intrinsic::nvvm_f2ull_rn:
205 case Intrinsic::nvvm_f2ll_rn_ftz:
206 case Intrinsic::nvvm_f2ull_rn_ftz:
207 case Intrinsic::nvvm_d2ll_rn:
208 case Intrinsic::nvvm_d2ull_rn:
210
211 // RP:
212 case Intrinsic::nvvm_f2i_rp:
213 case Intrinsic::nvvm_f2ui_rp:
214 case Intrinsic::nvvm_f2i_rp_ftz:
215 case Intrinsic::nvvm_f2ui_rp_ftz:
216 case Intrinsic::nvvm_d2i_rp:
217 case Intrinsic::nvvm_d2ui_rp:
218
219 case Intrinsic::nvvm_f2ll_rp:
220 case Intrinsic::nvvm_f2ull_rp:
221 case Intrinsic::nvvm_f2ll_rp_ftz:
222 case Intrinsic::nvvm_f2ull_rp_ftz:
223 case Intrinsic::nvvm_d2ll_rp:
224 case Intrinsic::nvvm_d2ull_rp:
226
227 // RZ:
228 case Intrinsic::nvvm_f2i_rz:
229 case Intrinsic::nvvm_f2ui_rz:
230 case Intrinsic::nvvm_f2i_rz_ftz:
231 case Intrinsic::nvvm_f2ui_rz_ftz:
232 case Intrinsic::nvvm_d2i_rz:
233 case Intrinsic::nvvm_d2ui_rz:
234
235 case Intrinsic::nvvm_f2ll_rz:
236 case Intrinsic::nvvm_f2ull_rz:
237 case Intrinsic::nvvm_f2ll_rz_ftz:
238 case Intrinsic::nvvm_f2ull_rz_ftz:
239 case Intrinsic::nvvm_d2ll_rz:
240 case Intrinsic::nvvm_d2ull_rz:
242 }
243 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
244 return APFloat::roundingMode::Invalid;
245}
246
247inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
248 switch (IntrinsicID) {
249 case Intrinsic::nvvm_fmax_ftz_f:
250 case Intrinsic::nvvm_fmax_ftz_nan_f:
251 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
252 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
253
254 case Intrinsic::nvvm_fmin_ftz_f:
255 case Intrinsic::nvvm_fmin_ftz_nan_f:
256 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
257 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
258 return true;
259
260 case Intrinsic::nvvm_fmax_d:
261 case Intrinsic::nvvm_fmax_f:
262 case Intrinsic::nvvm_fmax_nan_f:
263 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
264 case Intrinsic::nvvm_fmax_xorsign_abs_f:
265
266 case Intrinsic::nvvm_fmin_d:
267 case Intrinsic::nvvm_fmin_f:
268 case Intrinsic::nvvm_fmin_nan_f:
269 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
270 case Intrinsic::nvvm_fmin_xorsign_abs_f:
271 return false;
272 }
273 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
274 return false;
275}
276
277inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
278 switch (IntrinsicID) {
279 case Intrinsic::nvvm_fmax_ftz_nan_f:
280 case Intrinsic::nvvm_fmax_nan_f:
281 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
282 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
283
284 case Intrinsic::nvvm_fmin_ftz_nan_f:
285 case Intrinsic::nvvm_fmin_nan_f:
286 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
287 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
288 return true;
289
290 case Intrinsic::nvvm_fmax_d:
291 case Intrinsic::nvvm_fmax_f:
292 case Intrinsic::nvvm_fmax_ftz_f:
293 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
294 case Intrinsic::nvvm_fmax_xorsign_abs_f:
295
296 case Intrinsic::nvvm_fmin_d:
297 case Intrinsic::nvvm_fmin_f:
298 case Intrinsic::nvvm_fmin_ftz_f:
299 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
300 case Intrinsic::nvvm_fmin_xorsign_abs_f:
301 return false;
302 }
303 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
304 return false;
305}
306
307inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
308 switch (IntrinsicID) {
309 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
310 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
311 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
312 case Intrinsic::nvvm_fmax_xorsign_abs_f:
313
314 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
315 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
316 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
317 case Intrinsic::nvvm_fmin_xorsign_abs_f:
318 return true;
319
320 case Intrinsic::nvvm_fmax_d:
321 case Intrinsic::nvvm_fmax_f:
322 case Intrinsic::nvvm_fmax_ftz_f:
323 case Intrinsic::nvvm_fmax_ftz_nan_f:
324 case Intrinsic::nvvm_fmax_nan_f:
325
326 case Intrinsic::nvvm_fmin_d:
327 case Intrinsic::nvvm_fmin_f:
328 case Intrinsic::nvvm_fmin_ftz_f:
329 case Intrinsic::nvvm_fmin_ftz_nan_f:
330 case Intrinsic::nvvm_fmin_nan_f:
331 return false;
332 }
333 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
334 return false;
335}
336
337} // namespace nvvm
338} // namespace llvm
339#endif // LLVM_IR_NVVMINTRINSICUTILS_H
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
RoundingMode
Rounding mode.
static constexpr roundingMode rmTowardNegative
Definition: APFloat.h:305
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:306
static constexpr roundingMode rmTowardPositive
Definition: APFloat.h:304