clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name NVPTXISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/NVPTX -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "NVPTXISelLowering.h" |
15 | #include "MCTargetDesc/NVPTXBaseInfo.h" |
16 | #include "NVPTX.h" |
17 | #include "NVPTXSubtarget.h" |
18 | #include "NVPTXTargetMachine.h" |
19 | #include "NVPTXTargetObjectFile.h" |
20 | #include "NVPTXUtilities.h" |
21 | #include "llvm/ADT/APInt.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/CodeGen/Analysis.h" |
26 | #include "llvm/CodeGen/MachineFunction.h" |
27 | #include "llvm/CodeGen/MachineMemOperand.h" |
28 | #include "llvm/CodeGen/SelectionDAG.h" |
29 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
30 | #include "llvm/CodeGen/TargetCallingConv.h" |
31 | #include "llvm/CodeGen/TargetLowering.h" |
32 | #include "llvm/CodeGen/ValueTypes.h" |
33 | #include "llvm/IR/Argument.h" |
34 | #include "llvm/IR/Attributes.h" |
35 | #include "llvm/IR/Constants.h" |
36 | #include "llvm/IR/DataLayout.h" |
37 | #include "llvm/IR/DerivedTypes.h" |
38 | #include "llvm/IR/Function.h" |
39 | #include "llvm/IR/GlobalValue.h" |
40 | #include "llvm/IR/Instruction.h" |
41 | #include "llvm/IR/Instructions.h" |
42 | #include "llvm/IR/IntrinsicsNVPTX.h" |
43 | #include "llvm/IR/Module.h" |
44 | #include "llvm/IR/Type.h" |
45 | #include "llvm/IR/Value.h" |
46 | #include "llvm/Support/Casting.h" |
47 | #include "llvm/Support/CodeGen.h" |
48 | #include "llvm/Support/CommandLine.h" |
49 | #include "llvm/Support/ErrorHandling.h" |
50 | #include "llvm/Support/MachineValueType.h" |
51 | #include "llvm/Support/MathExtras.h" |
52 | #include "llvm/Support/raw_ostream.h" |
53 | #include "llvm/Target/TargetMachine.h" |
54 | #include "llvm/Target/TargetOptions.h" |
55 | #include <algorithm> |
56 | #include <cassert> |
57 | #include <cstdint> |
58 | #include <iterator> |
59 | #include <sstream> |
60 | #include <string> |
61 | #include <utility> |
62 | #include <vector> |
63 | |
64 | #define DEBUG_TYPE "nvptx-lower" |
65 | |
66 | using namespace llvm; |
67 | |
68 | static std::atomic<unsigned> GlobalUniqueCallSite; |
69 | |
70 | static cl::opt<bool> sched4reg( |
71 | "nvptx-sched4reg", |
72 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); |
73 | |
74 | static cl::opt<unsigned> |
75 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, |
76 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" |
77 | " 1: do it 2: do it aggressively"), |
78 | cl::init(2)); |
79 | |
80 | static cl::opt<int> UsePrecDivF32( |
81 | "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, |
82 | cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" |
83 | " IEEE Compliant F32 div.rnd if available."), |
84 | cl::init(2)); |
85 | |
86 | static cl::opt<bool> UsePrecSqrtF32( |
87 | "nvptx-prec-sqrtf32", cl::Hidden, |
88 | cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), |
89 | cl::init(true)); |
90 | |
91 | int NVPTXTargetLowering::getDivF32Level() const { |
92 | if (UsePrecDivF32.getNumOccurrences() > 0) { |
93 | |
94 | return UsePrecDivF32; |
95 | } else { |
96 | |
97 | if (getTargetMachine().Options.UnsafeFPMath) |
98 | return 0; |
99 | else |
100 | return 2; |
101 | } |
102 | } |
103 | |
104 | bool NVPTXTargetLowering::usePrecSqrtF32() const { |
105 | if (UsePrecSqrtF32.getNumOccurrences() > 0) { |
106 | |
107 | return UsePrecSqrtF32; |
108 | } else { |
109 | |
110 | return !getTargetMachine().Options.UnsafeFPMath; |
111 | } |
112 | } |
113 | |
114 | bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { |
115 | return MF.getDenormalMode(APFloat::IEEEsingle()).Output == |
116 | DenormalMode::PreserveSign; |
117 | } |
118 | |
119 | static bool IsPTXVectorType(MVT VT) { |
120 | switch (VT.SimpleTy) { |
121 | default: |
122 | return false; |
123 | case MVT::v2i1: |
124 | case MVT::v4i1: |
125 | case MVT::v2i8: |
126 | case MVT::v4i8: |
127 | case MVT::v2i16: |
128 | case MVT::v4i16: |
129 | case MVT::v2i32: |
130 | case MVT::v4i32: |
131 | case MVT::v2i64: |
132 | case MVT::v2f16: |
133 | case MVT::v4f16: |
134 | case MVT::v8f16: |
135 | case MVT::v2f32: |
136 | case MVT::v4f32: |
137 | case MVT::v2f64: |
138 | return true; |
139 | } |
140 | } |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 | |
148 | static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, |
149 | Type *Ty, SmallVectorImpl<EVT> &ValueVTs, |
150 | SmallVectorImpl<uint64_t> *Offsets = nullptr, |
151 | uint64_t StartingOffset = 0) { |
152 | SmallVector<EVT, 16> TempVTs; |
153 | SmallVector<uint64_t, 16> TempOffsets; |
154 | |
155 | |
156 | if (Ty->isIntegerTy(128)) { |
157 | ValueVTs.push_back(EVT(MVT::i64)); |
158 | ValueVTs.push_back(EVT(MVT::i64)); |
159 | |
160 | if (Offsets) { |
161 | Offsets->push_back(StartingOffset + 0); |
162 | Offsets->push_back(StartingOffset + 8); |
163 | } |
164 | |
165 | return; |
166 | } |
167 | |
168 | |
169 | if (StructType *STy = dyn_cast<StructType>(Ty)) { |
170 | auto const *SL = DL.getStructLayout(STy); |
171 | auto ElementNum = 0; |
172 | for(auto *EI : STy->elements()) { |
173 | ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets, |
174 | StartingOffset + SL->getElementOffset(ElementNum)); |
175 | ++ElementNum; |
176 | } |
177 | return; |
178 | } |
179 | |
180 | ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); |
181 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { |
182 | EVT VT = TempVTs[i]; |
183 | uint64_t Off = TempOffsets[i]; |
184 | |
185 | |
186 | if (VT.isVector()) { |
187 | unsigned NumElts = VT.getVectorNumElements(); |
188 | EVT EltVT = VT.getVectorElementType(); |
189 | |
190 | |
191 | |
192 | if (EltVT == MVT::f16 && NumElts % 2 == 0) { |
193 | EltVT = MVT::v2f16; |
194 | NumElts /= 2; |
195 | } |
196 | for (unsigned j = 0; j != NumElts; ++j) { |
197 | ValueVTs.push_back(EltVT); |
198 | if (Offsets) |
199 | Offsets->push_back(Off + j * EltVT.getStoreSize()); |
200 | } |
201 | } else { |
202 | ValueVTs.push_back(VT); |
203 | if (Offsets) |
204 | Offsets->push_back(Off); |
205 | } |
206 | } |
207 | } |
208 | |
209 | |
210 | |
211 | |
212 | |
213 | |
214 | |
215 | |
216 | |
217 | |
218 | |
219 | static unsigned CanMergeParamLoadStoresStartingAt( |
220 | unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, |
221 | const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { |
222 | |
223 | |
224 | if (ParamAlignment < AccessSize) |
225 | return 1; |
226 | |
227 | if (Offsets[Idx] & (AccessSize - 1)) |
228 | return 1; |
229 | |
230 | EVT EltVT = ValueVTs[Idx]; |
231 | unsigned EltSize = EltVT.getStoreSize(); |
232 | |
233 | |
234 | if (EltSize >= AccessSize) |
235 | return 1; |
236 | |
237 | unsigned NumElts = AccessSize / EltSize; |
238 | |
239 | if (AccessSize != EltSize * NumElts) |
240 | return 1; |
241 | |
242 | |
243 | if (Idx + NumElts > ValueVTs.size()) |
244 | return 1; |
245 | |
246 | |
247 | if (NumElts != 4 && NumElts != 2) |
248 | return 1; |
249 | |
250 | for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) { |
251 | |
252 | if (ValueVTs[j] != EltVT) |
253 | return 1; |
254 | |
255 | |
256 | if (Offsets[j] - Offsets[j - 1] != EltSize) |
257 | return 1; |
258 | } |
259 | |
260 | return NumElts; |
261 | } |
262 | |
263 | |
264 | |
265 | enum ParamVectorizationFlags { |
266 | PVF_INNER = 0x0, |
267 | PVF_FIRST = 0x1, |
268 | PVF_LAST = 0x2, |
269 | |
270 | PVF_SCALAR = PVF_FIRST | PVF_LAST |
271 | }; |
272 | |
273 | |
274 | |
275 | |
276 | |
277 | |
278 | |
279 | |
280 | |
281 | static SmallVector<ParamVectorizationFlags, 16> |
282 | VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, |
283 | const SmallVectorImpl<uint64_t> &Offsets, |
284 | Align ParamAlignment) { |
285 | |
286 | |
287 | SmallVector<ParamVectorizationFlags, 16> VectorInfo; |
288 | VectorInfo.assign(ValueVTs.size(), PVF_SCALAR); |
289 | |
290 | |
291 | for (int I = 0, E = ValueVTs.size(); I != E; ++I) { |
292 | |
293 | assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state."); |
294 | for (unsigned AccessSize : {16, 8, 4, 2}) { |
295 | unsigned NumElts = CanMergeParamLoadStoresStartingAt( |
296 | I, AccessSize, ValueVTs, Offsets, ParamAlignment); |
297 | |
298 | switch (NumElts) { |
299 | default: |
300 | llvm_unreachable("Unexpected return value"); |
301 | case 1: |
302 | |
303 | continue; |
304 | case 2: |
305 | assert(I + 1 < E && "Not enough elements."); |
306 | VectorInfo[I] = PVF_FIRST; |
307 | VectorInfo[I + 1] = PVF_LAST; |
308 | I += 1; |
309 | break; |
310 | case 4: |
311 | assert(I + 3 < E && "Not enough elements."); |
312 | VectorInfo[I] = PVF_FIRST; |
313 | VectorInfo[I + 1] = PVF_INNER; |
314 | VectorInfo[I + 2] = PVF_INNER; |
315 | VectorInfo[I + 3] = PVF_LAST; |
316 | I += 3; |
317 | break; |
318 | } |
319 | |
320 | |
321 | break; |
322 | } |
323 | } |
324 | return VectorInfo; |
325 | } |
326 | |
327 | |
328 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, |
329 | const NVPTXSubtarget &STI) |
330 | : TargetLowering(TM), nvTM(&TM), STI(STI) { |
331 | |
332 | |
333 | |
334 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; |
335 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; |
336 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; |
337 | |
338 | setBooleanContents(ZeroOrNegativeOneBooleanContent); |
339 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
340 | |
341 | |
342 | |
343 | setJumpIsExpensive(true); |
344 | |
345 | |
346 | |
347 | addBypassSlowDiv(64, 32); |
348 | |
349 | |
350 | if (sched4reg) |
351 | setSchedulingPreference(Sched::RegPressure); |
352 | else |
353 | setSchedulingPreference(Sched::Source); |
354 | |
355 | auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, |
356 | LegalizeAction NoF16Action) { |
357 | setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action); |
358 | }; |
359 | |
360 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); |
361 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); |
362 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); |
363 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); |
364 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); |
365 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); |
366 | addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); |
367 | addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); |
368 | |
369 | |
370 | setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); |
371 | setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal); |
372 | setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); |
373 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); |
374 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand); |
375 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand); |
376 | |
377 | setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); |
378 | setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); |
379 | |
380 | |
381 | for (MVT VT : {MVT::f16, MVT::v2f16, MVT::f32, MVT::f64, MVT::i1, MVT::i8, |
382 | MVT::i16, MVT::i32, MVT::i64}) { |
383 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
384 | setOperationAction(ISD::BR_CC, VT, Expand); |
385 | } |
386 | |
387 | |
388 | |
389 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); |
390 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
391 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); |
392 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
393 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
394 | |
395 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); |
396 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); |
397 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); |
398 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); |
399 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); |
400 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); |
401 | |
402 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
403 | setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
404 | |
405 | |
406 | |
407 | |
408 | setOperationAction(ISD::ROTL, MVT::i64, Legal); |
409 | setOperationAction(ISD::ROTR, MVT::i64, Legal); |
410 | setOperationAction(ISD::ROTL, MVT::i32, Legal); |
411 | setOperationAction(ISD::ROTR, MVT::i32, Legal); |
412 | |
413 | setOperationAction(ISD::ROTL, MVT::i16, Expand); |
414 | setOperationAction(ISD::ROTR, MVT::i16, Expand); |
415 | setOperationAction(ISD::ROTL, MVT::i8, Expand); |
416 | setOperationAction(ISD::ROTR, MVT::i8, Expand); |
417 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); |
418 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
419 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); |
420 | |
421 | |
422 | |
423 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
424 | setOperationAction(ISD::BRIND, MVT::Other, Expand); |
425 | |
426 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
427 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
428 | |
429 | |
430 | |
431 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
432 | |
433 | |
434 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
435 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
436 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
437 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); |
438 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); |
439 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); |
440 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); |
441 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); |
442 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); |
443 | |
444 | |
445 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
446 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
447 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
448 | |
449 | |
450 | setOperationAction(ISD::LOAD, MVT::i1, Custom); |
451 | setOperationAction(ISD::STORE, MVT::i1, Custom); |
452 | |
453 | for (MVT VT : MVT::integer_valuetypes()) { |
454 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
455 | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
456 | setTruncStoreAction(VT, MVT::i1, Expand); |
457 | } |
458 | |
459 | |
460 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
461 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
462 | setOperationAction(ISD::ConstantFP, MVT::f16, Legal); |
463 | |
464 | |
465 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
466 | |
467 | |
468 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
469 | if (IsPTXVectorType(VT)) { |
470 | setOperationAction(ISD::LOAD, VT, Custom); |
471 | setOperationAction(ISD::STORE, VT, Custom); |
472 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); |
473 | } |
474 | } |
475 | |
476 | |
477 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); |
478 | |
479 | for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { |
480 | setOperationAction(ISD::ABS, Ty, Legal); |
481 | setOperationAction(ISD::SMIN, Ty, Legal); |
482 | setOperationAction(ISD::SMAX, Ty, Legal); |
483 | setOperationAction(ISD::UMIN, Ty, Legal); |
484 | setOperationAction(ISD::UMAX, Ty, Legal); |
485 | |
486 | setOperationAction(ISD::CTPOP, Ty, Legal); |
487 | setOperationAction(ISD::CTLZ, Ty, Legal); |
488 | } |
489 | |
490 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); |
491 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); |
492 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); |
493 | |
494 | |
495 | setOperationAction(ISD::SELECT, MVT::i1, Custom); |
496 | |
497 | |
498 | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
499 | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
500 | |
501 | |
502 | setTargetDAGCombine(ISD::ADD); |
503 | setTargetDAGCombine(ISD::AND); |
504 | setTargetDAGCombine(ISD::FADD); |
505 | setTargetDAGCombine(ISD::MUL); |
506 | setTargetDAGCombine(ISD::SHL); |
507 | setTargetDAGCombine(ISD::SREM); |
508 | setTargetDAGCombine(ISD::UREM); |
509 | |
510 | |
511 | |
512 | if (STI.allowFP16Math()) |
513 | setTargetDAGCombine(ISD::SETCC); |
514 | |
515 | |
516 | |
517 | |
518 | |
519 | |
520 | |
521 | for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) { |
522 | setFP16OperationAction(Op, MVT::f16, Legal, Promote); |
523 | setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); |
524 | } |
525 | |
526 | |
527 | setOperationAction(ISD::FNEG, MVT::f16, Expand); |
528 | setOperationAction(ISD::FNEG, MVT::v2f16, Expand); |
529 | |
530 | |
531 | |
532 | |
533 | for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, |
534 | ISD::FTRUNC}) { |
535 | setOperationAction(Op, MVT::f16, Legal); |
536 | setOperationAction(Op, MVT::f32, Legal); |
537 | setOperationAction(Op, MVT::f64, Legal); |
538 | setOperationAction(Op, MVT::v2f16, Expand); |
539 | } |
540 | |
541 | setOperationAction(ISD::FROUND, MVT::f16, Promote); |
542 | setOperationAction(ISD::FROUND, MVT::v2f16, Expand); |
543 | setOperationAction(ISD::FROUND, MVT::f32, Custom); |
544 | setOperationAction(ISD::FROUND, MVT::f64, Custom); |
545 | |
546 | |
547 | |
548 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); |
549 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); |
550 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
551 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
552 | |
553 | |
554 | |
555 | |
556 | for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, |
557 | ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) { |
558 | setOperationAction(Op, MVT::f16, Promote); |
559 | setOperationAction(Op, MVT::f32, Legal); |
560 | setOperationAction(Op, MVT::f64, Legal); |
561 | setOperationAction(Op, MVT::v2f16, Expand); |
562 | } |
563 | setOperationAction(ISD::FMINNUM, MVT::f16, Promote); |
564 | setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); |
565 | setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); |
566 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); |
567 | |
568 | |
569 | |
570 | |
571 | |
572 | |
573 | computeRegisterProperties(STI.getRegisterInfo()); |
574 | } |
575 | |
576 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { |
577 | switch ((NVPTXISD::NodeType)Opcode) { |
578 | case NVPTXISD::FIRST_NUMBER: |
579 | break; |
580 | case NVPTXISD::CALL: |
581 | return "NVPTXISD::CALL"; |
582 | case NVPTXISD::RET_FLAG: |
583 | return "NVPTXISD::RET_FLAG"; |
584 | case NVPTXISD::LOAD_PARAM: |
585 | return "NVPTXISD::LOAD_PARAM"; |
586 | case NVPTXISD::Wrapper: |
587 | return "NVPTXISD::Wrapper"; |
588 | case NVPTXISD::DeclareParam: |
589 | return "NVPTXISD::DeclareParam"; |
590 | case NVPTXISD::DeclareScalarParam: |
591 | return "NVPTXISD::DeclareScalarParam"; |
592 | case NVPTXISD::DeclareRet: |
593 | return "NVPTXISD::DeclareRet"; |
594 | case NVPTXISD::DeclareScalarRet: |
595 | return "NVPTXISD::DeclareScalarRet"; |
596 | case NVPTXISD::DeclareRetParam: |
597 | return "NVPTXISD::DeclareRetParam"; |
598 | case NVPTXISD::PrintCall: |
599 | return "NVPTXISD::PrintCall"; |
600 | case NVPTXISD::PrintConvergentCall: |
601 | return "NVPTXISD::PrintConvergentCall"; |
602 | case NVPTXISD::PrintCallUni: |
603 | return "NVPTXISD::PrintCallUni"; |
604 | case NVPTXISD::PrintConvergentCallUni: |
605 | return "NVPTXISD::PrintConvergentCallUni"; |
606 | case NVPTXISD::LoadParam: |
607 | return "NVPTXISD::LoadParam"; |
608 | case NVPTXISD::LoadParamV2: |
609 | return "NVPTXISD::LoadParamV2"; |
610 | case NVPTXISD::LoadParamV4: |
611 | return "NVPTXISD::LoadParamV4"; |
612 | case NVPTXISD::StoreParam: |
613 | return "NVPTXISD::StoreParam"; |
614 | case NVPTXISD::StoreParamV2: |
615 | return "NVPTXISD::StoreParamV2"; |
616 | case NVPTXISD::StoreParamV4: |
617 | return "NVPTXISD::StoreParamV4"; |
618 | case NVPTXISD::StoreParamS32: |
619 | return "NVPTXISD::StoreParamS32"; |
620 | case NVPTXISD::StoreParamU32: |
621 | return "NVPTXISD::StoreParamU32"; |
622 | case NVPTXISD::CallArgBegin: |
623 | return "NVPTXISD::CallArgBegin"; |
624 | case NVPTXISD::CallArg: |
625 | return "NVPTXISD::CallArg"; |
626 | case NVPTXISD::LastCallArg: |
627 | return "NVPTXISD::LastCallArg"; |
628 | case NVPTXISD::CallArgEnd: |
629 | return "NVPTXISD::CallArgEnd"; |
630 | case NVPTXISD::CallVoid: |
631 | return "NVPTXISD::CallVoid"; |
632 | case NVPTXISD::CallVal: |
633 | return "NVPTXISD::CallVal"; |
634 | case NVPTXISD::CallSymbol: |
635 | return "NVPTXISD::CallSymbol"; |
636 | case NVPTXISD::Prototype: |
637 | return "NVPTXISD::Prototype"; |
638 | case NVPTXISD::MoveParam: |
639 | return "NVPTXISD::MoveParam"; |
640 | case NVPTXISD::StoreRetval: |
641 | return "NVPTXISD::StoreRetval"; |
642 | case NVPTXISD::StoreRetvalV2: |
643 | return "NVPTXISD::StoreRetvalV2"; |
644 | case NVPTXISD::StoreRetvalV4: |
645 | return "NVPTXISD::StoreRetvalV4"; |
646 | case NVPTXISD::PseudoUseParam: |
647 | return "NVPTXISD::PseudoUseParam"; |
648 | case NVPTXISD::RETURN: |
649 | return "NVPTXISD::RETURN"; |
650 | case NVPTXISD::CallSeqBegin: |
651 | return "NVPTXISD::CallSeqBegin"; |
652 | case NVPTXISD::CallSeqEnd: |
653 | return "NVPTXISD::CallSeqEnd"; |
654 | case NVPTXISD::CallPrototype: |
655 | return "NVPTXISD::CallPrototype"; |
656 | case NVPTXISD::ProxyReg: |
657 | return "NVPTXISD::ProxyReg"; |
658 | case NVPTXISD::LoadV2: |
659 | return "NVPTXISD::LoadV2"; |
660 | case NVPTXISD::LoadV4: |
661 | return "NVPTXISD::LoadV4"; |
662 | case NVPTXISD::LDGV2: |
663 | return "NVPTXISD::LDGV2"; |
664 | case NVPTXISD::LDGV4: |
665 | return "NVPTXISD::LDGV4"; |
666 | case NVPTXISD::LDUV2: |
667 | return "NVPTXISD::LDUV2"; |
668 | case NVPTXISD::LDUV4: |
669 | return "NVPTXISD::LDUV4"; |
670 | case NVPTXISD::StoreV2: |
671 | return "NVPTXISD::StoreV2"; |
672 | case NVPTXISD::StoreV4: |
673 | return "NVPTXISD::StoreV4"; |
674 | case NVPTXISD::FUN_SHFL_CLAMP: |
675 | return "NVPTXISD::FUN_SHFL_CLAMP"; |
676 | case NVPTXISD::FUN_SHFR_CLAMP: |
677 | return "NVPTXISD::FUN_SHFR_CLAMP"; |
678 | case NVPTXISD::IMAD: |
679 | return "NVPTXISD::IMAD"; |
680 | case NVPTXISD::SETP_F16X2: |
681 | return "NVPTXISD::SETP_F16X2"; |
682 | case NVPTXISD::Dummy: |
683 | return "NVPTXISD::Dummy"; |
684 | case NVPTXISD::MUL_WIDE_SIGNED: |
685 | return "NVPTXISD::MUL_WIDE_SIGNED"; |
686 | case NVPTXISD::MUL_WIDE_UNSIGNED: |
687 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; |
688 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; |
689 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; |
690 | case NVPTXISD::Tex1DFloatFloatLevel: |
691 | return "NVPTXISD::Tex1DFloatFloatLevel"; |
692 | case NVPTXISD::Tex1DFloatFloatGrad: |
693 | return "NVPTXISD::Tex1DFloatFloatGrad"; |
694 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; |
695 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; |
696 | case NVPTXISD::Tex1DS32FloatLevel: |
697 | return "NVPTXISD::Tex1DS32FloatLevel"; |
698 | case NVPTXISD::Tex1DS32FloatGrad: |
699 | return "NVPTXISD::Tex1DS32FloatGrad"; |
700 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; |
701 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; |
702 | case NVPTXISD::Tex1DU32FloatLevel: |
703 | return "NVPTXISD::Tex1DU32FloatLevel"; |
704 | case NVPTXISD::Tex1DU32FloatGrad: |
705 | return "NVPTXISD::Tex1DU32FloatGrad"; |
706 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; |
707 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; |
708 | case NVPTXISD::Tex1DArrayFloatFloatLevel: |
709 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; |
710 | case NVPTXISD::Tex1DArrayFloatFloatGrad: |
711 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; |
712 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; |
713 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; |
714 | case NVPTXISD::Tex1DArrayS32FloatLevel: |
715 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; |
716 | case NVPTXISD::Tex1DArrayS32FloatGrad: |
717 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; |
718 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; |
719 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; |
720 | case NVPTXISD::Tex1DArrayU32FloatLevel: |
721 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; |
722 | case NVPTXISD::Tex1DArrayU32FloatGrad: |
723 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; |
724 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; |
725 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; |
726 | case NVPTXISD::Tex2DFloatFloatLevel: |
727 | return "NVPTXISD::Tex2DFloatFloatLevel"; |
728 | case NVPTXISD::Tex2DFloatFloatGrad: |
729 | return "NVPTXISD::Tex2DFloatFloatGrad"; |
730 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; |
731 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; |
732 | case NVPTXISD::Tex2DS32FloatLevel: |
733 | return "NVPTXISD::Tex2DS32FloatLevel"; |
734 | case NVPTXISD::Tex2DS32FloatGrad: |
735 | return "NVPTXISD::Tex2DS32FloatGrad"; |
736 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; |
737 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; |
738 | case NVPTXISD::Tex2DU32FloatLevel: |
739 | return "NVPTXISD::Tex2DU32FloatLevel"; |
740 | case NVPTXISD::Tex2DU32FloatGrad: |
741 | return "NVPTXISD::Tex2DU32FloatGrad"; |
742 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; |
743 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; |
744 | case NVPTXISD::Tex2DArrayFloatFloatLevel: |
745 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; |
746 | case NVPTXISD::Tex2DArrayFloatFloatGrad: |
747 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; |
748 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; |
749 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; |
750 | case NVPTXISD::Tex2DArrayS32FloatLevel: |
751 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; |
752 | case NVPTXISD::Tex2DArrayS32FloatGrad: |
753 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; |
754 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; |
755 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; |
756 | case NVPTXISD::Tex2DArrayU32FloatLevel: |
757 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; |
758 | case NVPTXISD::Tex2DArrayU32FloatGrad: |
759 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; |
760 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; |
761 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; |
762 | case NVPTXISD::Tex3DFloatFloatLevel: |
763 | return "NVPTXISD::Tex3DFloatFloatLevel"; |
764 | case NVPTXISD::Tex3DFloatFloatGrad: |
765 | return "NVPTXISD::Tex3DFloatFloatGrad"; |
766 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; |
767 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; |
768 | case NVPTXISD::Tex3DS32FloatLevel: |
769 | return "NVPTXISD::Tex3DS32FloatLevel"; |
770 | case NVPTXISD::Tex3DS32FloatGrad: |
771 | return "NVPTXISD::Tex3DS32FloatGrad"; |
772 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; |
773 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; |
774 | case NVPTXISD::Tex3DU32FloatLevel: |
775 | return "NVPTXISD::Tex3DU32FloatLevel"; |
776 | case NVPTXISD::Tex3DU32FloatGrad: |
777 | return "NVPTXISD::Tex3DU32FloatGrad"; |
778 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; |
779 | case NVPTXISD::TexCubeFloatFloatLevel: |
780 | return "NVPTXISD::TexCubeFloatFloatLevel"; |
781 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; |
782 | case NVPTXISD::TexCubeS32FloatLevel: |
783 | return "NVPTXISD::TexCubeS32FloatLevel"; |
784 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; |
785 | case NVPTXISD::TexCubeU32FloatLevel: |
786 | return "NVPTXISD::TexCubeU32FloatLevel"; |
787 | case NVPTXISD::TexCubeArrayFloatFloat: |
788 | return "NVPTXISD::TexCubeArrayFloatFloat"; |
789 | case NVPTXISD::TexCubeArrayFloatFloatLevel: |
790 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; |
791 | case NVPTXISD::TexCubeArrayS32Float: |
792 | return "NVPTXISD::TexCubeArrayS32Float"; |
793 | case NVPTXISD::TexCubeArrayS32FloatLevel: |
794 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; |
795 | case NVPTXISD::TexCubeArrayU32Float: |
796 | return "NVPTXISD::TexCubeArrayU32Float"; |
797 | case NVPTXISD::TexCubeArrayU32FloatLevel: |
798 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; |
799 | case NVPTXISD::Tld4R2DFloatFloat: |
800 | return "NVPTXISD::Tld4R2DFloatFloat"; |
801 | case NVPTXISD::Tld4G2DFloatFloat: |
802 | return "NVPTXISD::Tld4G2DFloatFloat"; |
803 | case NVPTXISD::Tld4B2DFloatFloat: |
804 | return "NVPTXISD::Tld4B2DFloatFloat"; |
805 | case NVPTXISD::Tld4A2DFloatFloat: |
806 | return "NVPTXISD::Tld4A2DFloatFloat"; |
807 | case NVPTXISD::Tld4R2DS64Float: |
808 | return "NVPTXISD::Tld4R2DS64Float"; |
809 | case NVPTXISD::Tld4G2DS64Float: |
810 | return "NVPTXISD::Tld4G2DS64Float"; |
811 | case NVPTXISD::Tld4B2DS64Float: |
812 | return "NVPTXISD::Tld4B2DS64Float"; |
813 | case NVPTXISD::Tld4A2DS64Float: |
814 | return "NVPTXISD::Tld4A2DS64Float"; |
815 | case NVPTXISD::Tld4R2DU64Float: |
816 | return "NVPTXISD::Tld4R2DU64Float"; |
817 | case NVPTXISD::Tld4G2DU64Float: |
818 | return "NVPTXISD::Tld4G2DU64Float"; |
819 | case NVPTXISD::Tld4B2DU64Float: |
820 | return "NVPTXISD::Tld4B2DU64Float"; |
821 | case NVPTXISD::Tld4A2DU64Float: |
822 | return "NVPTXISD::Tld4A2DU64Float"; |
823 | |
824 | case NVPTXISD::TexUnified1DFloatS32: |
825 | return "NVPTXISD::TexUnified1DFloatS32"; |
826 | case NVPTXISD::TexUnified1DFloatFloat: |
827 | return "NVPTXISD::TexUnified1DFloatFloat"; |
828 | case NVPTXISD::TexUnified1DFloatFloatLevel: |
829 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; |
830 | case NVPTXISD::TexUnified1DFloatFloatGrad: |
831 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; |
832 | case NVPTXISD::TexUnified1DS32S32: |
833 | return "NVPTXISD::TexUnified1DS32S32"; |
834 | case NVPTXISD::TexUnified1DS32Float: |
835 | return "NVPTXISD::TexUnified1DS32Float"; |
836 | case NVPTXISD::TexUnified1DS32FloatLevel: |
837 | return "NVPTXISD::TexUnified1DS32FloatLevel"; |
838 | case NVPTXISD::TexUnified1DS32FloatGrad: |
839 | return "NVPTXISD::TexUnified1DS32FloatGrad"; |
840 | case NVPTXISD::TexUnified1DU32S32: |
841 | return "NVPTXISD::TexUnified1DU32S32"; |
842 | case NVPTXISD::TexUnified1DU32Float: |
843 | return "NVPTXISD::TexUnified1DU32Float"; |
844 | case NVPTXISD::TexUnified1DU32FloatLevel: |
845 | return "NVPTXISD::TexUnified1DU32FloatLevel"; |
846 | case NVPTXISD::TexUnified1DU32FloatGrad: |
847 | return "NVPTXISD::TexUnified1DU32FloatGrad"; |
848 | case NVPTXISD::TexUnified1DArrayFloatS32: |
849 | return "NVPTXISD::TexUnified1DArrayFloatS32"; |
850 | case NVPTXISD::TexUnified1DArrayFloatFloat: |
851 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; |
852 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: |
853 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; |
854 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: |
855 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; |
856 | case NVPTXISD::TexUnified1DArrayS32S32: |
857 | return "NVPTXISD::TexUnified1DArrayS32S32"; |
858 | case NVPTXISD::TexUnified1DArrayS32Float: |
859 | return "NVPTXISD::TexUnified1DArrayS32Float"; |
860 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: |
861 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; |
862 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: |
863 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; |
864 | case NVPTXISD::TexUnified1DArrayU32S32: |
865 | return "NVPTXISD::TexUnified1DArrayU32S32"; |
866 | case NVPTXISD::TexUnified1DArrayU32Float: |
867 | return "NVPTXISD::TexUnified1DArrayU32Float"; |
868 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: |
869 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; |
870 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: |
871 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; |
872 | case NVPTXISD::TexUnified2DFloatS32: |
873 | return "NVPTXISD::TexUnified2DFloatS32"; |
874 | case NVPTXISD::TexUnified2DFloatFloat: |
875 | return "NVPTXISD::TexUnified2DFloatFloat"; |
876 | case NVPTXISD::TexUnified2DFloatFloatLevel: |
877 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; |
878 | case NVPTXISD::TexUnified2DFloatFloatGrad: |
879 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; |
880 | case NVPTXISD::TexUnified2DS32S32: |
881 | return "NVPTXISD::TexUnified2DS32S32"; |
882 | case NVPTXISD::TexUnified2DS32Float: |
883 | return "NVPTXISD::TexUnified2DS32Float"; |
884 | case NVPTXISD::TexUnified2DS32FloatLevel: |
885 | return "NVPTXISD::TexUnified2DS32FloatLevel"; |
886 | case NVPTXISD::TexUnified2DS32FloatGrad: |
887 | return "NVPTXISD::TexUnified2DS32FloatGrad"; |
888 | case NVPTXISD::TexUnified2DU32S32: |
889 | return "NVPTXISD::TexUnified2DU32S32"; |
890 | case NVPTXISD::TexUnified2DU32Float: |
891 | return "NVPTXISD::TexUnified2DU32Float"; |
892 | case NVPTXISD::TexUnified2DU32FloatLevel: |
893 | return "NVPTXISD::TexUnified2DU32FloatLevel"; |
894 | case NVPTXISD::TexUnified2DU32FloatGrad: |
895 | return "NVPTXISD::TexUnified2DU32FloatGrad"; |
896 | case NVPTXISD::TexUnified2DArrayFloatS32: |
897 | return "NVPTXISD::TexUnified2DArrayFloatS32"; |
898 | case NVPTXISD::TexUnified2DArrayFloatFloat: |
899 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; |
900 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: |
901 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; |
902 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: |
903 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; |
904 | case NVPTXISD::TexUnified2DArrayS32S32: |
905 | return "NVPTXISD::TexUnified2DArrayS32S32"; |
906 | case NVPTXISD::TexUnified2DArrayS32Float: |
907 | return "NVPTXISD::TexUnified2DArrayS32Float"; |
908 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: |
909 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; |
910 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: |
911 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; |
912 | case NVPTXISD::TexUnified2DArrayU32S32: |
913 | return "NVPTXISD::TexUnified2DArrayU32S32"; |
914 | case NVPTXISD::TexUnified2DArrayU32Float: |
915 | return "NVPTXISD::TexUnified2DArrayU32Float"; |
916 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: |
917 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; |
918 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: |
919 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; |
920 | case NVPTXISD::TexUnified3DFloatS32: |
921 | return "NVPTXISD::TexUnified3DFloatS32"; |
922 | case NVPTXISD::TexUnified3DFloatFloat: |
923 | return "NVPTXISD::TexUnified3DFloatFloat"; |
924 | case NVPTXISD::TexUnified3DFloatFloatLevel: |
925 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; |
926 | case NVPTXISD::TexUnified3DFloatFloatGrad: |
927 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; |
928 | case NVPTXISD::TexUnified3DS32S32: |
929 | return "NVPTXISD::TexUnified3DS32S32"; |
930 | case NVPTXISD::TexUnified3DS32Float: |
931 | return "NVPTXISD::TexUnified3DS32Float"; |
932 | case NVPTXISD::TexUnified3DS32FloatLevel: |
933 | return "NVPTXISD::TexUnified3DS32FloatLevel"; |
934 | case NVPTXISD::TexUnified3DS32FloatGrad: |
935 | return "NVPTXISD::TexUnified3DS32FloatGrad"; |
936 | case NVPTXISD::TexUnified3DU32S32: |
937 | return "NVPTXISD::TexUnified3DU32S32"; |
938 | case NVPTXISD::TexUnified3DU32Float: |
939 | return "NVPTXISD::TexUnified3DU32Float"; |
940 | case NVPTXISD::TexUnified3DU32FloatLevel: |
941 | return "NVPTXISD::TexUnified3DU32FloatLevel"; |
942 | case NVPTXISD::TexUnified3DU32FloatGrad: |
943 | return "NVPTXISD::TexUnified3DU32FloatGrad"; |
944 | case NVPTXISD::TexUnifiedCubeFloatFloat: |
945 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; |
946 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: |
947 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; |
948 | case NVPTXISD::TexUnifiedCubeS32Float: |
949 | return "NVPTXISD::TexUnifiedCubeS32Float"; |
950 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: |
951 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; |
952 | case NVPTXISD::TexUnifiedCubeU32Float: |
953 | return "NVPTXISD::TexUnifiedCubeU32Float"; |
954 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: |
955 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; |
956 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: |
957 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; |
958 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: |
959 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; |
960 | case NVPTXISD::TexUnifiedCubeArrayS32Float: |
961 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; |
962 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: |
963 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; |
964 | case NVPTXISD::TexUnifiedCubeArrayU32Float: |
965 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; |
966 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: |
967 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; |
968 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: |
969 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; |
970 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: |
971 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; |
972 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: |
973 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; |
974 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: |
975 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; |
976 | case NVPTXISD::Tld4UnifiedR2DS64Float: |
977 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; |
978 | case NVPTXISD::Tld4UnifiedG2DS64Float: |
979 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; |
980 | case NVPTXISD::Tld4UnifiedB2DS64Float: |
981 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; |
982 | case NVPTXISD::Tld4UnifiedA2DS64Float: |
983 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; |
984 | case NVPTXISD::Tld4UnifiedR2DU64Float: |
985 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; |
986 | case NVPTXISD::Tld4UnifiedG2DU64Float: |
987 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; |
988 | case NVPTXISD::Tld4UnifiedB2DU64Float: |
989 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; |
990 | case NVPTXISD::Tld4UnifiedA2DU64Float: |
991 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; |
992 | |
993 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; |
994 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; |
995 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; |
996 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; |
997 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; |
998 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; |
999 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; |
1000 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; |
1001 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; |
1002 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; |
1003 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; |
1004 | |
1005 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; |
1006 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; |
1007 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; |
1008 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; |
1009 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; |
1010 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; |
1011 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; |
1012 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; |
1013 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; |
1014 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; |
1015 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; |
1016 | |
1017 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; |
1018 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; |
1019 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; |
1020 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; |
1021 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; |
1022 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; |
1023 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; |
1024 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; |
1025 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; |
1026 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; |
1027 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; |
1028 | |
1029 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; |
1030 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; |
1031 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; |
1032 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; |
1033 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; |
1034 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; |
1035 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; |
1036 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; |
1037 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; |
1038 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; |
1039 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; |
1040 | |
1041 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; |
1042 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; |
1043 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; |
1044 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; |
1045 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; |
1046 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; |
1047 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; |
1048 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; |
1049 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; |
1050 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; |
1051 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; |
1052 | |
1053 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; |
1054 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; |
1055 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; |
1056 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; |
1057 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; |
1058 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; |
1059 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; |
1060 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; |
1061 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; |
1062 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; |
1063 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; |
1064 | |
1065 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; |
1066 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; |
1067 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; |
1068 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; |
1069 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; |
1070 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; |
1071 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; |
1072 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; |
1073 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; |
1074 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; |
1075 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; |
1076 | |
1077 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; |
1078 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; |
1079 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; |
1080 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; |
1081 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; |
1082 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; |
1083 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; |
1084 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; |
1085 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; |
1086 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; |
1087 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; |
1088 | |
1089 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; |
1090 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; |
1091 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; |
1092 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; |
1093 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; |
1094 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; |
1095 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; |
1096 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; |
1097 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; |
1098 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; |
1099 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; |
1100 | |
1101 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; |
1102 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; |
1103 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; |
1104 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; |
1105 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; |
1106 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; |
1107 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; |
1108 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; |
1109 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; |
1110 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; |
1111 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; |
1112 | |
1113 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; |
1114 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; |
1115 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; |
1116 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; |
1117 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; |
1118 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; |
1119 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; |
1120 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; |
1121 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; |
1122 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; |
1123 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; |
1124 | |
1125 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; |
1126 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; |
1127 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; |
1128 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; |
1129 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; |
1130 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; |
1131 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; |
1132 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; |
1133 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; |
1134 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; |
1135 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; |
1136 | |
1137 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; |
1138 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; |
1139 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; |
1140 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; |
1141 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; |
1142 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; |
1143 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; |
1144 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; |
1145 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; |
1146 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; |
1147 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; |
1148 | |
1149 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; |
1150 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; |
1151 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; |
1152 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; |
1153 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; |
1154 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; |
1155 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; |
1156 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; |
1157 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; |
1158 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; |
1159 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; |
1160 | |
1161 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; |
1162 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; |
1163 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; |
1164 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; |
1165 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; |
1166 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; |
1167 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; |
1168 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; |
1169 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; |
1170 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; |
1171 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; |
1172 | } |
1173 | return nullptr; |
1174 | } |
1175 | |
1176 | TargetLoweringBase::LegalizeTypeAction |
1177 | NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { |
1178 | if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && |
1179 | VT.getScalarType() == MVT::i1) |
1180 | return TypeSplitVector; |
1181 | if (VT == MVT::v2f16) |
1182 | return TypeLegal; |
1183 | return TargetLoweringBase::getPreferredVectorAction(VT); |
1184 | } |
1185 | |
1186 | SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
1187 | int Enabled, int &ExtraSteps, |
1188 | bool &UseOneConst, |
1189 | bool Reciprocal) const { |
1190 | if (!(Enabled == ReciprocalEstimate::Enabled || |
1191 | (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32()))) |
1192 | return SDValue(); |
1193 | |
1194 | if (ExtraSteps == ReciprocalEstimate::Unspecified) |
1195 | ExtraSteps = 0; |
1196 | |
1197 | SDLoc DL(Operand); |
1198 | EVT VT = Operand.getValueType(); |
1199 | bool Ftz = useF32FTZ(DAG.getMachineFunction()); |
1200 | |
1201 | auto MakeIntrinsicCall = [&](Intrinsic::ID IID) { |
1202 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, |
1203 | DAG.getConstant(IID, DL, MVT::i32), Operand); |
1204 | }; |
1205 | |
1206 | |
1207 | |
1208 | |
1209 | |
1210 | if (Reciprocal || ExtraSteps > 0) { |
1211 | if (VT == MVT::f32) |
1212 | return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f |
1213 | : Intrinsic::nvvm_rsqrt_approx_f); |
1214 | else if (VT == MVT::f64) |
1215 | return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d); |
1216 | else |
1217 | return SDValue(); |
1218 | } else { |
1219 | if (VT == MVT::f32) |
1220 | return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f |
1221 | : Intrinsic::nvvm_sqrt_approx_f); |
1222 | else { |
1223 | |
1224 | |
1225 | |
1226 | |
1227 | return DAG.getNode( |
1228 | ISD::INTRINSIC_WO_CHAIN, DL, VT, |
1229 | DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), |
1230 | MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); |
1231 | } |
1232 | } |
1233 | } |
1234 | |
1235 | SDValue |
1236 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { |
1237 | SDLoc dl(Op); |
1238 | const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op); |
1239 | auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace()); |
1240 | Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT); |
1241 | return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); |
1242 | } |
1243 | |
1244 | std::string NVPTXTargetLowering::getPrototype( |
1245 | const DataLayout &DL, Type *retTy, const ArgListTy &Args, |
1246 | const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment, |
1247 | const CallBase &CB, unsigned UniqueCallSite) const { |
1248 | auto PtrVT = getPointerTy(DL); |
1249 | |
1250 | bool isABI = (STI.getSmVersion() >= 20); |
1251 | assert(isABI && "Non-ABI compilation is not supported"); |
1252 | if (!isABI) |
1253 | return ""; |
1254 | |
1255 | std::stringstream O; |
1256 | O << "prototype_" << UniqueCallSite << " : .callprototype "; |
1257 | |
1258 | if (retTy->getTypeID() == Type::VoidTyID) { |
1259 | O << "()"; |
1260 | } else { |
1261 | O << "("; |
1262 | if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) { |
1263 | unsigned size = 0; |
1264 | if (auto *ITy = dyn_cast<IntegerType>(retTy)) { |
1265 | size = ITy->getBitWidth(); |
1266 | } else { |
1267 | assert(retTy->isFloatingPointTy() && |
1268 | "Floating point type expected here"); |
1269 | size = retTy->getPrimitiveSizeInBits(); |
1270 | } |
1271 | |
1272 | |
1273 | |
1274 | if (size < 32) |
1275 | size = 32; |
1276 | |
1277 | O << ".param .b" << size << " _"; |
1278 | } else if (isa<PointerType>(retTy)) { |
1279 | O << ".param .b" << PtrVT.getSizeInBits() << " _"; |
1280 | } else if (retTy->isAggregateType() || retTy->isVectorTy() || |
1281 | retTy->isIntegerTy(128)) { |
1282 | O << ".param .align " << (retAlignment ? retAlignment->value() : 0) |
1283 | << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; |
1284 | } else { |
1285 | llvm_unreachable("Unknown return type"); |
1286 | } |
1287 | O << ") "; |
1288 | } |
1289 | O << "_ ("; |
1290 | |
1291 | bool first = true; |
1292 | |
1293 | unsigned OIdx = 0; |
1294 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { |
1295 | Type *Ty = Args[i].Ty; |
1296 | if (!first) { |
1297 | O << ", "; |
1298 | } |
1299 | first = false; |
1300 | |
1301 | if (!Outs[OIdx].Flags.isByVal()) { |
1302 | if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { |
1303 | unsigned align = 0; |
1304 | const CallInst *CallI = cast<CallInst>(&CB); |
1305 | |
1306 | if (!getAlign(*CallI, i + 1, align)) |
1307 | align = DL.getABITypeAlignment(Ty); |
1308 | unsigned sz = DL.getTypeAllocSize(Ty); |
1309 | O << ".param .align " << align << " .b8 "; |
1310 | O << "_"; |
1311 | O << "[" << sz << "]"; |
1312 | |
1313 | SmallVector<EVT, 16> vtparts; |
1314 | ComputeValueVTs(*this, DL, Ty, vtparts); |
1315 | if (unsigned len = vtparts.size()) |
1316 | OIdx += len - 1; |
1317 | continue; |
1318 | } |
1319 | |
1320 | assert((getValueType(DL, Ty) == Outs[OIdx].VT || |
1321 | (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && |
1322 | "type mismatch between callee prototype and arguments"); |
1323 | |
1324 | unsigned sz = 0; |
1325 | if (isa<IntegerType>(Ty)) { |
1326 | sz = cast<IntegerType>(Ty)->getBitWidth(); |
1327 | if (sz < 32) |
1328 | sz = 32; |
1329 | } else if (isa<PointerType>(Ty)) { |
1330 | sz = PtrVT.getSizeInBits(); |
1331 | } else if (Ty->isHalfTy()) |
1332 | |
1333 | |
1334 | |
1335 | sz = 32; |
1336 | else |
1337 | sz = Ty->getPrimitiveSizeInBits(); |
1338 | O << ".param .b" << sz << " "; |
1339 | O << "_"; |
1340 | continue; |
1341 | } |
1342 | auto *PTy = dyn_cast<PointerType>(Ty); |
1343 | assert(PTy && "Param with byval attribute should be a pointer type"); |
1344 | Type *ETy = PTy->getElementType(); |
1345 | |
1346 | Align align = Outs[OIdx].Flags.getNonZeroByValAlign(); |
1347 | unsigned sz = DL.getTypeAllocSize(ETy); |
1348 | O << ".param .align " << align.value() << " .b8 "; |
1349 | O << "_"; |
1350 | O << "[" << sz << "]"; |
1351 | } |
1352 | O << ");"; |
1353 | return O.str(); |
1354 | } |
1355 | |
1356 | Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, |
1357 | const CallBase *CB, Type *Ty, |
1358 | unsigned Idx, |
1359 | const DataLayout &DL) const { |
1360 | if (!CB) { |
1361 | |
1362 | return DL.getABITypeAlign(Ty); |
1363 | } |
1364 | |
1365 | unsigned Alignment = 0; |
1366 | const Function *DirectCallee = CB->getCalledFunction(); |
1367 | |
1368 | if (!DirectCallee) { |
1369 | |
1370 | |
1371 | |
1372 | |
1373 | if (const auto *CI = dyn_cast<CallInst>(CB)) { |
1374 | |
1375 | if (getAlign(*CI, Idx, Alignment)) |
1376 | return Align(Alignment); |
1377 | |
1378 | const Value *CalleeV = CI->getCalledOperand(); |
1379 | |
1380 | while (isa<ConstantExpr>(CalleeV)) { |
1381 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); |
1382 | if (!CE->isCast()) |
1383 | break; |
1384 | |
1385 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); |
1386 | } |
1387 | |
1388 | |
1389 | |
1390 | if (const auto *CalleeF = dyn_cast<Function>(CalleeV)) |
1391 | DirectCallee = CalleeF; |
1392 | } |
1393 | } |
1394 | |
1395 | |
1396 | |
1397 | if (DirectCallee) |
1398 | if (getAlign(*DirectCallee, Idx, Alignment)) |
1399 | return Align(Alignment); |
1400 | |
1401 | |
1402 | |
1403 | return DL.getABITypeAlign(Ty); |
1404 | } |
1405 | |
1406 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
1407 | SmallVectorImpl<SDValue> &InVals) const { |
1408 | SelectionDAG &DAG = CLI.DAG; |
1409 | SDLoc dl = CLI.DL; |
1410 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
1411 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
1412 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
1413 | SDValue Chain = CLI.Chain; |
1414 | SDValue Callee = CLI.Callee; |
1415 | bool &isTailCall = CLI.IsTailCall; |
1416 | ArgListTy &Args = CLI.getArgs(); |
1417 | Type *RetTy = CLI.RetTy; |
1418 | const CallBase *CB = CLI.CB; |
1419 | const DataLayout &DL = DAG.getDataLayout(); |
1420 | |
1421 | bool isABI = (STI.getSmVersion() >= 20); |
| 1 | Assuming the condition is true | |
|
1422 | assert(isABI && "Non-ABI compilation is not supported"); |
1423 | if (!isABI) |
| |
1424 | return Chain; |
1425 | |
1426 | unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1); |
1427 | SDValue tempChain = Chain; |
1428 | Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl); |
1429 | SDValue InFlag = Chain.getValue(1); |
1430 | |
1431 | unsigned paramCount = 0; |
1432 | |
1433 | |
1434 | |
1435 | |
1436 | |
1437 | |
1438 | |
1439 | |
1440 | |
1441 | unsigned OIdx = 0; |
1442 | |
1443 | |
1444 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { |
| 3 | | Assuming 'i' is not equal to 'e' | |
|
| 4 | | Loop condition is true. Entering loop body | |
|
1445 | EVT VT = Outs[OIdx].VT; |
1446 | Type *Ty = Args[i].Ty; |
1447 | |
1448 | if (!Outs[OIdx].Flags.isByVal()) { |
| 5 | | Assuming the condition is false | |
|
| |
1449 | SmallVector<EVT, 16> VTs; |
1450 | SmallVector<uint64_t, 16> Offsets; |
1451 | ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); |
1452 | Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL); |
1453 | unsigned AllocSize = DL.getTypeAllocSize(Ty); |
1454 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1455 | bool NeedAlign; |
1456 | if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { |
1457 | |
1458 | SDValue DeclareParamOps[] = { |
1459 | Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), |
1460 | DAG.getConstant(paramCount, dl, MVT::i32), |
1461 | DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; |
1462 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, |
1463 | DeclareParamOps); |
1464 | NeedAlign = true; |
1465 | } else { |
1466 | |
1467 | if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) { |
1468 | |
1469 | |
1470 | |
1471 | AllocSize = 4; |
1472 | } |
1473 | SDValue DeclareScalarParamOps[] = { |
1474 | Chain, DAG.getConstant(paramCount, dl, MVT::i32), |
1475 | DAG.getConstant(AllocSize * 8, dl, MVT::i32), |
1476 | DAG.getConstant(0, dl, MVT::i32), InFlag}; |
1477 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, |
1478 | DeclareScalarParamOps); |
1479 | NeedAlign = false; |
1480 | } |
1481 | InFlag = Chain.getValue(1); |
1482 | |
1483 | |
1484 | |
1485 | |
1486 | |
1487 | bool ExtendIntegerParam = |
1488 | Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; |
1489 | |
1490 | auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); |
1491 | SmallVector<SDValue, 6> StoreOperands; |
1492 | for (unsigned j = 0, je = VTs.size(); j != je; ++j) { |
1493 | |
1494 | if (VectorInfo[j] & PVF_FIRST) { |
1495 | assert(StoreOperands.empty() && "Unfinished preceding store."); |
1496 | StoreOperands.push_back(Chain); |
1497 | StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); |
1498 | StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32)); |
1499 | } |
1500 | |
1501 | EVT EltVT = VTs[j]; |
1502 | SDValue StVal = OutVals[OIdx]; |
1503 | if (ExtendIntegerParam) { |
1504 | assert(VTs.size() == 1 && "Scalar can't have multiple parts."); |
1505 | |
1506 | StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND |
1507 | : ISD::ZERO_EXTEND, |
1508 | dl, MVT::i32, StVal); |
1509 | } else if (EltVT.getSizeInBits() < 16) { |
1510 | |
1511 | |
1512 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); |
1513 | } |
1514 | |
1515 | |
1516 | StoreOperands.push_back(StVal); |
1517 | |
1518 | if (VectorInfo[j] & PVF_LAST) { |
1519 | unsigned NumElts = StoreOperands.size() - 3; |
1520 | NVPTXISD::NodeType Op; |
1521 | switch (NumElts) { |
1522 | case 1: |
1523 | Op = NVPTXISD::StoreParam; |
1524 | break; |
1525 | case 2: |
1526 | Op = NVPTXISD::StoreParamV2; |
1527 | break; |
1528 | case 4: |
1529 | Op = NVPTXISD::StoreParamV4; |
1530 | break; |
1531 | default: |
1532 | llvm_unreachable("Invalid vector info."); |
1533 | } |
1534 | |
1535 | StoreOperands.push_back(InFlag); |
1536 | |
1537 | |
1538 | |
1539 | EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; |
1540 | MaybeAlign EltAlign; |
1541 | if (NeedAlign) |
1542 | EltAlign = commonAlignment(ArgAlign, Offsets[j]); |
1543 | |
1544 | Chain = DAG.getMemIntrinsicNode( |
1545 | Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, |
1546 | TheStoreType, MachinePointerInfo(), EltAlign, |
1547 | MachineMemOperand::MOStore); |
1548 | InFlag = Chain.getValue(1); |
1549 | |
1550 | |
1551 | StoreOperands.clear(); |
1552 | } |
1553 | ++OIdx; |
1554 | } |
1555 | assert(StoreOperands.empty() && "Unfinished parameter store."); |
1556 | if (VTs.size() > 0) |
1557 | --OIdx; |
1558 | ++paramCount; |
1559 | continue; |
1560 | } |
1561 | |
1562 | |
1563 | SmallVector<EVT, 16> VTs; |
1564 | SmallVector<uint64_t, 16> Offsets; |
1565 | auto *PTy = dyn_cast<PointerType>(Args[i].Ty); |
| 7 | | Assuming field 'Ty' is not a 'PointerType' | |
|
| 8 | | 'PTy' initialized to a null pointer value | |
|
1566 | assert(PTy && "Type of a byval parameter should be pointer"); |
1567 | ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0); |
| 9 | | Called C++ object pointer is null |
|
1568 | |
1569 | |
1570 | unsigned sz = Outs[OIdx].Flags.getByValSize(); |
1571 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1572 | Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); |
1573 | |
1574 | |
1575 | |
1576 | |
1577 | |
1578 | |
1579 | |
1580 | if (ArgAlign < Align(4)) |
1581 | ArgAlign = Align(4); |
1582 | SDValue DeclareParamOps[] = { |
1583 | Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), |
1584 | DAG.getConstant(paramCount, dl, MVT::i32), |
1585 | DAG.getConstant(sz, dl, MVT::i32), InFlag}; |
1586 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, |
1587 | DeclareParamOps); |
1588 | InFlag = Chain.getValue(1); |
1589 | for (unsigned j = 0, je = VTs.size(); j != je; ++j) { |
1590 | EVT elemtype = VTs[j]; |
1591 | int curOffset = Offsets[j]; |
1592 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset); |
1593 | auto PtrVT = getPointerTy(DL); |
1594 | SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], |
1595 | DAG.getConstant(curOffset, dl, PtrVT)); |
1596 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, |
1597 | MachinePointerInfo(), PartAlign); |
1598 | if (elemtype.getSizeInBits() < 16) { |
1599 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); |
1600 | } |
1601 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1602 | SDValue CopyParamOps[] = { Chain, |
1603 | DAG.getConstant(paramCount, dl, MVT::i32), |
1604 | DAG.getConstant(curOffset, dl, MVT::i32), |
1605 | theVal, InFlag }; |
1606 | Chain = DAG.getMemIntrinsicNode( |
1607 | NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, |
1608 | MachinePointerInfo(), None, MachineMemOperand::MOStore); |
1609 | |
1610 | InFlag = Chain.getValue(1); |
1611 | } |
1612 | ++paramCount; |
1613 | } |
1614 | |
1615 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); |
1616 | MaybeAlign retAlignment = None; |
1617 | |
1618 | |
1619 | if (Ins.size() > 0) { |
1620 | SmallVector<EVT, 16> resvtparts; |
1621 | ComputeValueVTs(*this, DL, RetTy, resvtparts); |
1622 | |
1623 | |
1624 | |
1625 | |
1626 | unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy); |
1627 | |
1628 | |
1629 | |
1630 | |
1631 | if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() || |
1632 | (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) { |
1633 | |
1634 | if (resultsz < 32) |
1635 | resultsz = 32; |
1636 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1637 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), |
1638 | DAG.getConstant(resultsz, dl, MVT::i32), |
1639 | DAG.getConstant(0, dl, MVT::i32), InFlag }; |
1640 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, |
1641 | DeclareRetOps); |
1642 | InFlag = Chain.getValue(1); |
1643 | } else { |
1644 | retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL); |
1645 | assert(retAlignment && "retAlignment is guaranteed to be set"); |
1646 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1647 | SDValue DeclareRetOps[] = { |
1648 | Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), |
1649 | DAG.getConstant(resultsz / 8, dl, MVT::i32), |
1650 | DAG.getConstant(0, dl, MVT::i32), InFlag}; |
1651 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, |
1652 | DeclareRetOps); |
1653 | InFlag = Chain.getValue(1); |
1654 | } |
1655 | } |
1656 | |
1657 | |
1658 | |
1659 | |
1660 | bool isIndirectCall = !Func && CB; |
1661 | |
1662 | if (isa<ExternalSymbolSDNode>(Callee)) { |
1663 | Function* CalleeFunc = nullptr; |
1664 | |
1665 | |
1666 | Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc); |
1667 | assert(CalleeFunc != nullptr && "Libcall callee must be set."); |
1668 | |
1669 | |
1670 | |
1671 | CalleeFunc->addFnAttr("nvptx-libcall-callee", "true"); |
1672 | } |
1673 | |
1674 | if (isIndirectCall) { |
1675 | |
1676 | |
1677 | |
1678 | |
1679 | |
1680 | |
1681 | |
1682 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1683 | std::string Proto = |
1684 | getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite); |
1685 | const char *ProtoStr = |
1686 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); |
1687 | SDValue ProtoOps[] = { |
1688 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, |
1689 | }; |
1690 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); |
1691 | InFlag = Chain.getValue(1); |
1692 | } |
1693 | |
1694 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1695 | SDValue PrintCallOps[] = { |
1696 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag |
1697 | }; |
1698 | |
1699 | unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni; |
1700 | if (CLI.IsConvergent) |
1701 | Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni |
1702 | : NVPTXISD::PrintConvergentCall; |
1703 | Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); |
1704 | InFlag = Chain.getValue(1); |
1705 | |
1706 | |
1707 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1708 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; |
1709 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); |
1710 | InFlag = Chain.getValue(1); |
1711 | |
1712 | |
1713 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1714 | SDValue CallArgBeginOps[] = { Chain, InFlag }; |
1715 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, |
1716 | CallArgBeginOps); |
1717 | InFlag = Chain.getValue(1); |
1718 | |
1719 | for (unsigned i = 0, e = paramCount; i != e; ++i) { |
1720 | unsigned opcode; |
1721 | if (i == (e - 1)) |
1722 | opcode = NVPTXISD::LastCallArg; |
1723 | else |
1724 | opcode = NVPTXISD::CallArg; |
1725 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1726 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), |
1727 | DAG.getConstant(i, dl, MVT::i32), InFlag }; |
1728 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); |
1729 | InFlag = Chain.getValue(1); |
1730 | } |
1731 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1732 | SDValue CallArgEndOps[] = { Chain, |
1733 | DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32), |
1734 | InFlag }; |
1735 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); |
1736 | InFlag = Chain.getValue(1); |
1737 | |
1738 | if (isIndirectCall) { |
1739 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1740 | SDValue PrototypeOps[] = { |
1741 | Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag}; |
1742 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); |
1743 | InFlag = Chain.getValue(1); |
1744 | } |
1745 | |
1746 | SmallVector<SDValue, 16> ProxyRegOps; |
1747 | SmallVector<Optional<MVT>, 16> ProxyRegTruncates; |
1748 | |
1749 | |
1750 | if (Ins.size() > 0) { |
1751 | SmallVector<EVT, 16> VTs; |
1752 | SmallVector<uint64_t, 16> Offsets; |
1753 | ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); |
1754 | assert(VTs.size() == Ins.size() && "Bad value decomposition"); |
1755 | |
1756 | Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL); |
1757 | auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); |
1758 | |
1759 | SmallVector<EVT, 6> LoadVTs; |
1760 | int VecIdx = -1; |
1761 | |
1762 | |
1763 | |
1764 | |
1765 | bool ExtendIntegerRetVal = |
1766 | RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; |
1767 | |
1768 | for (unsigned i = 0, e = VTs.size(); i != e; ++i) { |
1769 | bool needTruncate = false; |
1770 | EVT TheLoadType = VTs[i]; |
1771 | EVT EltType = Ins[i].VT; |
1772 | Align EltAlign = commonAlignment(RetAlign, Offsets[i]); |
1773 | if (ExtendIntegerRetVal) { |
1774 | TheLoadType = MVT::i32; |
1775 | EltType = MVT::i32; |
1776 | needTruncate = true; |
1777 | } else if (TheLoadType.getSizeInBits() < 16) { |
1778 | if (VTs[i].isInteger()) |
1779 | needTruncate = true; |
1780 | EltType = MVT::i16; |
1781 | } |
1782 | |
1783 | |
1784 | if (VectorInfo[i] & PVF_FIRST) { |
1785 | assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); |
1786 | VecIdx = i; |
1787 | } |
1788 | |
1789 | LoadVTs.push_back(EltType); |
1790 | |
1791 | if (VectorInfo[i] & PVF_LAST) { |
1792 | unsigned NumElts = LoadVTs.size(); |
1793 | LoadVTs.push_back(MVT::Other); |
1794 | LoadVTs.push_back(MVT::Glue); |
1795 | NVPTXISD::NodeType Op; |
1796 | switch (NumElts) { |
1797 | case 1: |
1798 | Op = NVPTXISD::LoadParam; |
1799 | break; |
1800 | case 2: |
1801 | Op = NVPTXISD::LoadParamV2; |
1802 | break; |
1803 | case 4: |
1804 | Op = NVPTXISD::LoadParamV4; |
1805 | break; |
1806 | default: |
1807 | llvm_unreachable("Invalid vector info."); |
1808 | } |
1809 | |
1810 | SDValue LoadOperands[] = { |
1811 | Chain, DAG.getConstant(1, dl, MVT::i32), |
1812 | DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; |
1813 | SDValue RetVal = DAG.getMemIntrinsicNode( |
1814 | Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, |
1815 | MachinePointerInfo(), EltAlign, |
1816 | MachineMemOperand::MOLoad); |
1817 | |
1818 | for (unsigned j = 0; j < NumElts; ++j) { |
1819 | ProxyRegOps.push_back(RetVal.getValue(j)); |
1820 | |
1821 | if (needTruncate) |
1822 | ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT)); |
1823 | else |
1824 | ProxyRegTruncates.push_back(Optional<MVT>()); |
1825 | } |
1826 | |
1827 | Chain = RetVal.getValue(NumElts); |
1828 | InFlag = RetVal.getValue(NumElts + 1); |
1829 | |
1830 | |
1831 | VecIdx = -1; |
1832 | LoadVTs.clear(); |
1833 | } |
1834 | } |
1835 | } |
1836 | |
1837 | Chain = DAG.getCALLSEQ_END( |
1838 | Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true), |
1839 | DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl); |
1840 | InFlag = Chain.getValue(1); |
1841 | |
1842 | |
1843 | |
1844 | |
1845 | for (unsigned i = 0; i < ProxyRegOps.size(); ++i) { |
1846 | SDValue Ret = DAG.getNode( |
1847 | NVPTXISD::ProxyReg, dl, |
1848 | DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue), |
1849 | { Chain, ProxyRegOps[i], InFlag } |
1850 | ); |
1851 | |
1852 | Chain = Ret.getValue(1); |
1853 | InFlag = Ret.getValue(2); |
1854 | |
1855 | if (ProxyRegTruncates[i].hasValue()) { |
1856 | Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret); |
1857 | } |
1858 | |
1859 | InVals.push_back(Ret); |
1860 | } |
1861 | |
1862 | |
1863 | |
1864 | isTailCall = false; |
1865 | return Chain; |
1866 | } |
1867 | |
1868 | |
1869 | |
1870 | |
1871 | SDValue |
1872 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { |
1873 | SDNode *Node = Op.getNode(); |
1874 | SDLoc dl(Node); |
1875 | SmallVector<SDValue, 8> Ops; |
1876 | unsigned NumOperands = Node->getNumOperands(); |
1877 | for (unsigned i = 0; i < NumOperands; ++i) { |
1878 | SDValue SubOp = Node->getOperand(i); |
1879 | EVT VVT = SubOp.getNode()->getValueType(0); |
1880 | EVT EltVT = VVT.getVectorElementType(); |
1881 | unsigned NumSubElem = VVT.getVectorNumElements(); |
1882 | for (unsigned j = 0; j < NumSubElem; ++j) { |
1883 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, |
1884 | DAG.getIntPtrConstant(j, dl))); |
1885 | } |
1886 | } |
1887 | return DAG.getBuildVector(Node->getValueType(0), dl, Ops); |
1888 | } |
1889 | |
1890 | |
1891 | |
1892 | |
1893 | |
1894 | |
1895 | |
1896 | |
1897 | |
1898 | |
1899 | |
1900 | SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, |
1901 | SelectionDAG &DAG) const { |
1902 | |
1903 | if (!(Op->getValueType(0) == MVT::v2f16 && |
1904 | isa<ConstantFPSDNode>(Op->getOperand(0)) && |
1905 | isa<ConstantFPSDNode>(Op->getOperand(1)))) |
1906 | return Op; |
1907 | |
1908 | APInt E0 = |
1909 | cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt(); |
1910 | APInt E1 = |
1911 | cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt(); |
1912 | SDValue Const = |
1913 | DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); |
1914 | return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const); |
1915 | } |
1916 | |
1917 | SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
1918 | SelectionDAG &DAG) const { |
1919 | SDValue Index = Op->getOperand(1); |
1920 | |
1921 | if (isa<ConstantSDNode>(Index.getNode())) |
1922 | return Op; |
1923 | |
1924 | |
1925 | SDValue Vector = Op->getOperand(0); |
1926 | EVT VectorVT = Vector.getValueType(); |
1927 | assert(VectorVT == MVT::v2f16 && "Unexpected vector type."); |
1928 | EVT EltVT = VectorVT.getVectorElementType(); |
1929 | |
1930 | SDLoc dl(Op.getNode()); |
1931 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, |
1932 | DAG.getIntPtrConstant(0, dl)); |
1933 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, |
1934 | DAG.getIntPtrConstant(1, dl)); |
1935 | return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1, |
1936 | ISD::CondCode::SETEQ); |
1937 | } |
1938 | |
1939 | |
1940 | |
1941 | |
1942 | |
1943 | |
1944 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, |
1945 | SelectionDAG &DAG) const { |
1946 | assert(Op.getNumOperands() == 3 && "Not a double-shift!"); |
1947 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); |
1948 | |
1949 | EVT VT = Op.getValueType(); |
1950 | unsigned VTBits = VT.getSizeInBits(); |
1951 | SDLoc dl(Op); |
1952 | SDValue ShOpLo = Op.getOperand(0); |
1953 | SDValue ShOpHi = Op.getOperand(1); |
1954 | SDValue ShAmt = Op.getOperand(2); |
1955 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; |
1956 | |
1957 | if (VTBits == 32 && STI.getSmVersion() >= 35) { |
1958 | |
1959 | |
1960 | |
1961 | |
1962 | |
1963 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); |
1964 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, |
1965 | ShAmt); |
1966 | |
1967 | SDValue Ops[2] = { Lo, Hi }; |
1968 | return DAG.getMergeValues(Ops, dl); |
1969 | } |
1970 | else { |
1971 | |
1972 | |
1973 | |
1974 | |
1975 | |
1976 | |
1977 | |
1978 | |
1979 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, |
1980 | DAG.getConstant(VTBits, dl, MVT::i32), |
1981 | ShAmt); |
1982 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); |
1983 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, |
1984 | DAG.getConstant(VTBits, dl, MVT::i32)); |
1985 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); |
1986 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); |
1987 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); |
1988 | |
1989 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, |
1990 | DAG.getConstant(VTBits, dl, MVT::i32), |
1991 | ISD::SETGE); |
1992 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); |
1993 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); |
1994 | |
1995 | SDValue Ops[2] = { Lo, Hi }; |
1996 | return DAG.getMergeValues(Ops, dl); |
1997 | } |
1998 | } |
1999 | |
2000 | |
2001 | |
2002 | |
2003 | |
2004 | |
2005 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, |
2006 | SelectionDAG &DAG) const { |
2007 | assert(Op.getNumOperands() == 3 && "Not a double-shift!"); |
2008 | assert(Op.getOpcode() == ISD::SHL_PARTS); |
2009 | |
2010 | EVT VT = Op.getValueType(); |
2011 | unsigned VTBits = VT.getSizeInBits(); |
2012 | SDLoc dl(Op); |
2013 | SDValue ShOpLo = Op.getOperand(0); |
2014 | SDValue ShOpHi = Op.getOperand(1); |
2015 | SDValue ShAmt = Op.getOperand(2); |
2016 | |
2017 | if (VTBits == 32 && STI.getSmVersion() >= 35) { |
2018 | |
2019 | |
2020 | |
2021 | |
2022 | |
2023 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, |
2024 | ShAmt); |
2025 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); |
2026 | |
2027 | SDValue Ops[2] = { Lo, Hi }; |
2028 | return DAG.getMergeValues(Ops, dl); |
2029 | } |
2030 | else { |
2031 | |
2032 | |
2033 | |
2034 | |
2035 | |
2036 | |
2037 | |
2038 | |
2039 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, |
2040 | DAG.getConstant(VTBits, dl, MVT::i32), |
2041 | ShAmt); |
2042 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); |
2043 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, |
2044 | DAG.getConstant(VTBits, dl, MVT::i32)); |
2045 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); |
2046 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); |
2047 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); |
2048 | |
2049 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, |
2050 | DAG.getConstant(VTBits, dl, MVT::i32), |
2051 | ISD::SETGE); |
2052 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); |
2053 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); |
2054 | |
2055 | SDValue Ops[2] = { Lo, Hi }; |
2056 | return DAG.getMergeValues(Ops, dl); |
2057 | } |
2058 | } |
2059 | |
2060 | SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { |
2061 | EVT VT = Op.getValueType(); |
2062 | |
2063 | if (VT == MVT::f32) |
2064 | return LowerFROUND32(Op, DAG); |
2065 | |
2066 | if (VT == MVT::f64) |
2067 | return LowerFROUND64(Op, DAG); |
2068 | |
2069 | llvm_unreachable("unhandled type"); |
2070 | } |
2071 | |
2072 | |
2073 | |
2074 | |
2075 | |
2076 | |
2077 | |
2078 | |
2079 | SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op, |
2080 | SelectionDAG &DAG) const { |
2081 | SDLoc SL(Op); |
2082 | SDValue A = Op.getOperand(0); |
2083 | EVT VT = Op.getValueType(); |
2084 | |
2085 | SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); |
2086 | |
2087 | |
2088 | SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A); |
2089 | const int SignBitMask = 0x80000000; |
2090 | SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast, |
2091 | DAG.getConstant(SignBitMask, SL, MVT::i32)); |
2092 | const int PointFiveInBits = 0x3F000000; |
2093 | SDValue PointFiveWithSignRaw = |
2094 | DAG.getNode(ISD::OR, SL, MVT::i32, Sign, |
2095 | DAG.getConstant(PointFiveInBits, SL, MVT::i32)); |
2096 | SDValue PointFiveWithSign = |
2097 | DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw); |
2098 | SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign); |
2099 | SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); |
2100 | |
2101 | |
2102 | EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
2103 | SDValue IsLarge = |
2104 | DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT), |
2105 | ISD::SETOGT); |
2106 | RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); |
2107 | |
2108 | |
2109 | SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, |
2110 | DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); |
2111 | SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A); |
2112 | return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA); |
2113 | } |
2114 | |
2115 | |
2116 | |
2117 | |
2118 | |
2119 | |
2120 | SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op, |
2121 | SelectionDAG &DAG) const { |
2122 | SDLoc SL(Op); |
2123 | SDValue A = Op.getOperand(0); |
2124 | EVT VT = Op.getValueType(); |
2125 | |
2126 | SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); |
2127 | |
2128 | |
2129 | SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA, |
2130 | DAG.getConstantFP(0.5, SL, VT)); |
2131 | SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); |
2132 | |
2133 | |
2134 | EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
2135 | SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, |
2136 | DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); |
2137 | RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall, |
2138 | DAG.getConstantFP(0, SL, VT), |
2139 | RoundedA); |
2140 | |
2141 | |
2142 | RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A); |
2143 | DAG.getNode(ISD::FTRUNC, SL, VT, A); |
2144 | |
2145 | |
2146 | SDValue IsLarge = |
2147 | DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT), |
2148 | ISD::SETOGT); |
2149 | return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); |
2150 | } |
2151 | |
2152 | |
2153 | |
2154 | SDValue |
2155 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
2156 | switch (Op.getOpcode()) { |
2157 | case ISD::RETURNADDR: |
2158 | return SDValue(); |
2159 | case ISD::FRAMEADDR: |
2160 | return SDValue(); |
2161 | case ISD::GlobalAddress: |
2162 | return LowerGlobalAddress(Op, DAG); |
2163 | case ISD::INTRINSIC_W_CHAIN: |
2164 | return Op; |
2165 | case ISD::BUILD_VECTOR: |
2166 | return LowerBUILD_VECTOR(Op, DAG); |
2167 | case ISD::EXTRACT_SUBVECTOR: |
2168 | return Op; |
2169 | case ISD::EXTRACT_VECTOR_ELT: |
2170 | return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
2171 | case ISD::CONCAT_VECTORS: |
2172 | return LowerCONCAT_VECTORS(Op, DAG); |
2173 | case ISD::STORE: |
2174 | return LowerSTORE(Op, DAG); |
2175 | case ISD::LOAD: |
2176 | return LowerLOAD(Op, DAG); |
2177 | case ISD::SHL_PARTS: |
2178 | return LowerShiftLeftParts(Op, DAG); |
2179 | case ISD::SRA_PARTS: |
2180 | case ISD::SRL_PARTS: |
2181 | return LowerShiftRightParts(Op, DAG); |
2182 | case ISD::SELECT: |
2183 | return LowerSelect(Op, DAG); |
2184 | case ISD::FROUND: |
2185 | return LowerFROUND(Op, DAG); |
2186 | default: |
2187 | llvm_unreachable("Custom lowering not defined for operation"); |
2188 | } |
2189 | } |
2190 | |
2191 | SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { |
2192 | SDValue Op0 = Op->getOperand(0); |
2193 | SDValue Op1 = Op->getOperand(1); |
2194 | SDValue Op2 = Op->getOperand(2); |
2195 | SDLoc DL(Op.getNode()); |
2196 | |
2197 | assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); |
2198 | |
2199 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); |
2200 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); |
2201 | SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); |
2202 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); |
2203 | |
2204 | return Trunc; |
2205 | } |
2206 | |
2207 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
2208 | if (Op.getValueType() == MVT::i1) |
2209 | return LowerLOADi1(Op, DAG); |
2210 | |
2211 | |
2212 | |
2213 | if (Op.getValueType() == MVT::v2f16) { |
2214 | LoadSDNode *Load = cast<LoadSDNode>(Op); |
2215 | EVT MemVT = Load->getMemoryVT(); |
2216 | if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
2217 | MemVT, *Load->getMemOperand())) { |
2218 | SDValue Ops[2]; |
2219 | std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); |
2220 | return DAG.getMergeValues(Ops, SDLoc(Op)); |
2221 | } |
2222 | } |
2223 | |
2224 | return SDValue(); |
2225 | } |
2226 | |
2227 | |
2228 | |
2229 | |
2230 | |
2231 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { |
2232 | SDNode *Node = Op.getNode(); |
2233 | LoadSDNode *LD = cast<LoadSDNode>(Node); |
2234 | SDLoc dl(Node); |
2235 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD); |
2236 | assert(Node->getValueType(0) == MVT::i1 && |
2237 | "Custom lowering for i1 load only"); |
2238 | SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), |
2239 | LD->getPointerInfo(), LD->getAlignment(), |
2240 | LD->getMemOperand()->getFlags()); |
2241 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); |
2242 | |
2243 | |
2244 | |
2245 | SDValue Ops[] = { result, LD->getChain() }; |
2246 | return DAG.getMergeValues(Ops, dl); |
2247 | } |
2248 | |
2249 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
2250 | StoreSDNode *Store = cast<StoreSDNode>(Op); |
2251 | EVT VT = Store->getMemoryVT(); |
2252 | |
2253 | if (VT == MVT::i1) |
2254 | return LowerSTOREi1(Op, DAG); |
2255 | |
2256 | |
2257 | |
2258 | if (VT == MVT::v2f16 && |
2259 | !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
2260 | VT, *Store->getMemOperand())) |
2261 | return expandUnalignedStore(Store, DAG); |
2262 | |
2263 | if (VT.isVector()) |
2264 | return LowerSTOREVector(Op, DAG); |
2265 | |
2266 | return SDValue(); |
2267 | } |
2268 | |
2269 | SDValue |
2270 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { |
2271 | SDNode *N = Op.getNode(); |
2272 | SDValue Val = N->getOperand(1); |
2273 | SDLoc DL(N); |
2274 | EVT ValVT = Val.getValueType(); |
2275 | |
2276 | if (ValVT.isVector()) { |
2277 | |
2278 | |
2279 | |
2280 | if (!ValVT.isSimple()) |
2281 | return SDValue(); |
2282 | switch (ValVT.getSimpleVT().SimpleTy) { |
2283 | default: |
2284 | return SDValue(); |
2285 | case MVT::v2i8: |
2286 | case MVT::v2i16: |
2287 | case MVT::v2i32: |
2288 | case MVT::v2i64: |
2289 | case MVT::v2f16: |
2290 | case MVT::v2f32: |
2291 | case MVT::v2f64: |
2292 | case MVT::v4i8: |
2293 | case MVT::v4i16: |
2294 | case MVT::v4i32: |
2295 | case MVT::v4f16: |
2296 | case MVT::v4f32: |
2297 | case MVT::v8f16: |
2298 | |
2299 | break; |
2300 | } |
2301 | |
2302 | MemSDNode *MemSD = cast<MemSDNode>(N); |
2303 | const DataLayout &TD = DAG.getDataLayout(); |
2304 | |
2305 | Align Alignment = MemSD->getAlign(); |
2306 | Align PrefAlign = |
2307 | TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext())); |
2308 | if (Alignment < PrefAlign) { |
2309 | |
2310 | |
2311 | |
2312 | |
2313 | |
2314 | return SDValue(); |
2315 | } |
2316 | |
2317 | unsigned Opcode = 0; |
2318 | EVT EltVT = ValVT.getVectorElementType(); |
2319 | unsigned NumElts = ValVT.getVectorNumElements(); |
2320 | |
2321 | |
2322 | |
2323 | |
2324 | bool NeedExt = false; |
2325 | if (EltVT.getSizeInBits() < 16) |
2326 | NeedExt = true; |
2327 | |
2328 | bool StoreF16x2 = false; |
2329 | switch (NumElts) { |
2330 | default: |
2331 | return SDValue(); |
2332 | case 2: |
2333 | Opcode = NVPTXISD::StoreV2; |
2334 | break; |
2335 | case 4: |
2336 | Opcode = NVPTXISD::StoreV4; |
2337 | break; |
2338 | case 8: |
2339 | |
2340 | |
2341 | |
2342 | assert(EltVT == MVT::f16 && "Wrong type for the vector."); |
2343 | Opcode = NVPTXISD::StoreV4; |
2344 | StoreF16x2 = true; |
2345 | break; |
2346 | } |
2347 | |
2348 | SmallVector<SDValue, 8> Ops; |
2349 | |
2350 | |
2351 | Ops.push_back(N->getOperand(0)); |
2352 | |
2353 | if (StoreF16x2) { |
2354 | |
2355 | NumElts /= 2; |
2356 | for (unsigned i = 0; i < NumElts; ++i) { |
2357 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, |
2358 | DAG.getIntPtrConstant(i * 2, DL)); |
2359 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, |
2360 | DAG.getIntPtrConstant(i * 2 + 1, DL)); |
2361 | SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1); |
2362 | Ops.push_back(V2); |
2363 | } |
2364 | } else { |
2365 | |
2366 | for (unsigned i = 0; i < NumElts; ++i) { |
2367 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, |
2368 | DAG.getIntPtrConstant(i, DL)); |
2369 | if (NeedExt) |
2370 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); |
2371 | Ops.push_back(ExtVal); |
2372 | } |
2373 | } |
2374 | |
2375 | |
2376 | Ops.append(N->op_begin() + 2, N->op_end()); |
2377 | |
2378 | SDValue NewSt = |
2379 | DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops, |
2380 | MemSD->getMemoryVT(), MemSD->getMemOperand()); |
2381 | |
2382 | |
2383 | return NewSt; |
2384 | } |
2385 | |
2386 | return SDValue(); |
2387 | } |
2388 | |
2389 | |
2390 | |
2391 | |
2392 | |
2393 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { |
2394 | SDNode *Node = Op.getNode(); |
2395 | SDLoc dl(Node); |
2396 | StoreSDNode *ST = cast<StoreSDNode>(Node); |
2397 | SDValue Tmp1 = ST->getChain(); |
2398 | SDValue Tmp2 = ST->getBasePtr(); |
2399 | SDValue Tmp3 = ST->getValue(); |
2400 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); |
2401 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); |
2402 | SDValue Result = |
2403 | DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, |
2404 | ST->getAlignment(), ST->getMemOperand()->getFlags()); |
2405 | return Result; |
2406 | } |
2407 | |
2408 | SDValue |
2409 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { |
2410 | std::string ParamSym; |
2411 | raw_string_ostream ParamStr(ParamSym); |
2412 | |
2413 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; |
2414 | ParamStr.flush(); |
2415 | |
2416 | std::string *SavedStr = |
2417 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); |
2418 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); |
2419 | } |
2420 | |
2421 | |
2422 | |
2423 | static bool isImageOrSamplerVal(const Value *arg, const Module *context) { |
2424 | static const char *const specialTypes[] = { "struct._image2d_t", |
2425 | "struct._image3d_t", |
2426 | "struct._sampler_t" }; |
2427 | |
2428 | Type *Ty = arg->getType(); |
2429 | auto *PTy = dyn_cast<PointerType>(Ty); |
2430 | |
2431 | if (!PTy) |
2432 | return false; |
2433 | |
2434 | if (!context) |
2435 | return false; |
2436 | |
2437 | auto *STy = dyn_cast<StructType>(PTy->getElementType()); |
2438 | if (!STy || STy->isLiteral()) |
2439 | return false; |
2440 | |
2441 | return llvm::is_contained(specialTypes, STy->getName()); |
2442 | } |
2443 | |
2444 | SDValue NVPTXTargetLowering::LowerFormalArguments( |
2445 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
2446 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
2447 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
2448 | MachineFunction &MF = DAG.getMachineFunction(); |
2449 | const DataLayout &DL = DAG.getDataLayout(); |
2450 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
2451 | |
2452 | const Function *F = &MF.getFunction(); |
2453 | const AttributeList &PAL = F->getAttributes(); |
2454 | const TargetLowering *TLI = STI.getTargetLowering(); |
2455 | |
2456 | SDValue Root = DAG.getRoot(); |
2457 | std::vector<SDValue> OutChains; |
2458 | |
2459 | bool isABI = (STI.getSmVersion() >= 20); |
2460 | assert(isABI && "Non-ABI compilation is not supported"); |
2461 | if (!isABI) |
2462 | return Chain; |
2463 | |
2464 | std::vector<Type *> argTypes; |
2465 | std::vector<const Argument *> theArgs; |
2466 | for (const Argument &I : F->args()) { |
2467 | theArgs.push_back(&I); |
2468 | argTypes.push_back(I.getType()); |
2469 | } |
2470 | |
2471 | |
2472 | |
2473 | |
2474 | |
2475 | |
2476 | |
2477 | |
2478 | |
2479 | unsigned InsIdx = 0; |
2480 | |
2481 | int idx = 0; |
2482 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { |
2483 | Type *Ty = argTypes[i]; |
2484 | |
2485 | |
2486 | |
2487 | |
2488 | if (isImageOrSamplerVal( |
2489 | theArgs[i], |
2490 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() |
2491 | : nullptr))) { |
2492 | assert(isKernelFunction(*F) && |
2493 | "Only kernels can have image/sampler params"); |
2494 | InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); |
2495 | continue; |
2496 | } |
2497 | |
2498 | if (theArgs[i]->use_empty()) { |
2499 | |
2500 | if (Ty->isAggregateType() || Ty->isIntegerTy(128)) { |
2501 | SmallVector<EVT, 16> vtparts; |
2502 | |
2503 | ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); |
2504 | assert(vtparts.size() > 0 && "empty aggregate type not expected"); |
2505 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; |
2506 | ++parti) { |
2507 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2508 | ++InsIdx; |
2509 | } |
2510 | if (vtparts.size() > 0) |
2511 | --InsIdx; |
2512 | continue; |
2513 | } |
2514 | if (Ty->isVectorTy()) { |
2515 | EVT ObjectVT = getValueType(DL, Ty); |
2516 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); |
2517 | for (unsigned parti = 0; parti < NumRegs; ++parti) { |
2518 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2519 | ++InsIdx; |
2520 | } |
2521 | if (NumRegs > 0) |
2522 | --InsIdx; |
2523 | continue; |
2524 | } |
2525 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2526 | continue; |
2527 | } |
2528 | |
2529 | |
2530 | |
2531 | |
2532 | |
2533 | if (!PAL.hasParamAttr(i, Attribute::ByVal)) { |
2534 | bool aggregateIsPacked = false; |
2535 | if (StructType *STy = dyn_cast<StructType>(Ty)) |
2536 | aggregateIsPacked = STy->isPacked(); |
2537 | |
2538 | SmallVector<EVT, 16> VTs; |
2539 | SmallVector<uint64_t, 16> Offsets; |
2540 | ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); |
2541 | assert(VTs.size() > 0 && "Unexpected empty type."); |
2542 | auto VectorInfo = |
2543 | VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty)); |
2544 | |
2545 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); |
2546 | int VecIdx = -1; |
2547 | for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) { |
2548 | if (VectorInfo[parti] & PVF_FIRST) { |
2549 | assert(VecIdx == -1 && "Orphaned vector."); |
2550 | VecIdx = parti; |
2551 | } |
2552 | |
2553 | |
2554 | if (VectorInfo[parti] & PVF_LAST) { |
2555 | unsigned NumElts = parti - VecIdx + 1; |
2556 | EVT EltVT = VTs[parti]; |
2557 | |
2558 | EVT LoadVT = EltVT; |
2559 | if (EltVT == MVT::i1) |
2560 | LoadVT = MVT::i8; |
2561 | else if (EltVT == MVT::v2f16) |
2562 | |
2563 | |
2564 | |
2565 | LoadVT = MVT::i32; |
2566 | |
2567 | EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); |
2568 | SDValue VecAddr = |
2569 | DAG.getNode(ISD::ADD, dl, PtrVT, Arg, |
2570 | DAG.getConstant(Offsets[VecIdx], dl, PtrVT)); |
2571 | Value *srcValue = Constant::getNullValue(PointerType::get( |
2572 | EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); |
2573 | SDValue P = |
2574 | DAG.getLoad(VecVT, dl, Root, VecAddr, |
2575 | MachinePointerInfo(srcValue), aggregateIsPacked, |
2576 | MachineMemOperand::MODereferenceable | |
2577 | MachineMemOperand::MOInvariant); |
2578 | if (P.getNode()) |
2579 | P.getNode()->setIROrder(idx + 1); |
2580 | for (unsigned j = 0; j < NumElts; ++j) { |
2581 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P, |
2582 | DAG.getIntPtrConstant(j, dl)); |
2583 | |
2584 | if (EltVT == MVT::i1) |
2585 | Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); |
2586 | |
2587 | else if (EltVT == MVT::v2f16) |
2588 | Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); |
2589 | |
2590 | |
2591 | if (Ins[InsIdx].VT.isInteger() && |
2592 | Ins[InsIdx].VT.getFixedSizeInBits() > |
2593 | LoadVT.getFixedSizeInBits()) { |
2594 | unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND |
2595 | : ISD::ZERO_EXTEND; |
2596 | Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); |
2597 | } |
2598 | InVals.push_back(Elt); |
2599 | } |
2600 | |
2601 | |
2602 | VecIdx = -1; |
2603 | } |
2604 | ++InsIdx; |
2605 | } |
2606 | if (VTs.size() > 0) |
2607 | --InsIdx; |
2608 | continue; |
2609 | } |
2610 | |
2611 | |
2612 | |
2613 | |
2614 | |
2615 | |
2616 | |
2617 | |
2618 | EVT ObjectVT = getValueType(DL, Ty); |
2619 | assert(ObjectVT == Ins[InsIdx].VT && |
2620 | "Ins type did not match function type"); |
2621 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); |
2622 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); |
2623 | if (p.getNode()) |
2624 | p.getNode()->setIROrder(idx + 1); |
2625 | InVals.push_back(p); |
2626 | } |
2627 | |
2628 | |
2629 | |
2630 | |
2631 | |
2632 | |
2633 | |
2634 | |
2635 | |
2636 | if (!OutChains.empty()) |
2637 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); |
2638 | |
2639 | return Chain; |
2640 | } |
2641 | |
2642 | SDValue |
2643 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2644 | bool isVarArg, |
2645 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2646 | const SmallVectorImpl<SDValue> &OutVals, |
2647 | const SDLoc &dl, SelectionDAG &DAG) const { |
2648 | MachineFunction &MF = DAG.getMachineFunction(); |
2649 | Type *RetTy = MF.getFunction().getReturnType(); |
2650 | |
2651 | bool isABI = (STI.getSmVersion() >= 20); |
2652 | assert(isABI && "Non-ABI compilation is not supported"); |
2653 | if (!isABI) |
2654 | return Chain; |
2655 | |
2656 | const DataLayout &DL = DAG.getDataLayout(); |
2657 | SmallVector<EVT, 16> VTs; |
2658 | SmallVector<uint64_t, 16> Offsets; |
2659 | ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); |
2660 | assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); |
2661 | |
2662 | auto VectorInfo = VectorizePTXValueVTs( |
2663 | VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1)); |
2664 | |
2665 | |
2666 | |
2667 | |
2668 | bool ExtendIntegerRetVal = |
2669 | RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; |
2670 | |
2671 | SmallVector<SDValue, 6> StoreOperands; |
2672 | for (unsigned i = 0, e = VTs.size(); i != e; ++i) { |
2673 | |
2674 | if (VectorInfo[i] & PVF_FIRST) { |
2675 | assert(StoreOperands.empty() && "Orphaned operand list."); |
2676 | StoreOperands.push_back(Chain); |
2677 | StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); |
2678 | } |
2679 | |
2680 | SDValue RetVal = OutVals[i]; |
2681 | if (ExtendIntegerRetVal) { |
2682 | RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND |
2683 | : ISD::ZERO_EXTEND, |
2684 | dl, MVT::i32, RetVal); |
2685 | } else if (RetVal.getValueSizeInBits() < 16) { |
2686 | |
2687 | |
2688 | RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); |
2689 | } |
2690 | |
2691 | |
2692 | StoreOperands.push_back(RetVal); |
2693 | |
2694 | |
2695 | if (VectorInfo[i] & PVF_LAST) { |
2696 | NVPTXISD::NodeType Op; |
2697 | unsigned NumElts = StoreOperands.size() - 2; |
2698 | switch (NumElts) { |
2699 | case 1: |
2700 | Op = NVPTXISD::StoreRetval; |
2701 | break; |
2702 | case 2: |
2703 | Op = NVPTXISD::StoreRetvalV2; |
2704 | break; |
2705 | case 4: |
2706 | Op = NVPTXISD::StoreRetvalV4; |
2707 | break; |
2708 | default: |
2709 | llvm_unreachable("Invalid vector info."); |
2710 | } |
2711 | |
2712 | |
2713 | |
2714 | EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; |
2715 | Chain = DAG.getMemIntrinsicNode( |
2716 | Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, |
2717 | MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); |
2718 | |
2719 | StoreOperands.clear(); |
2720 | } |
2721 | } |
2722 | |
2723 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); |
2724 | } |
2725 | |
2726 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( |
2727 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, |
2728 | SelectionDAG &DAG) const { |
2729 | if (Constraint.length() > 1) |
2730 | return; |
2731 | else |
2732 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
2733 | } |
2734 | |
2735 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { |
2736 | switch (Intrinsic) { |
2737 | default: |
2738 | return 0; |
2739 | |
2740 | case Intrinsic::nvvm_tex_1d_v4f32_s32: |
2741 | return NVPTXISD::Tex1DFloatS32; |
2742 | case Intrinsic::nvvm_tex_1d_v4f32_f32: |
2743 | return NVPTXISD::Tex1DFloatFloat; |
2744 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: |
2745 | return NVPTXISD::Tex1DFloatFloatLevel; |
2746 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: |
2747 | return NVPTXISD::Tex1DFloatFloatGrad; |
2748 | case Intrinsic::nvvm_tex_1d_v4s32_s32: |
2749 | return NVPTXISD::Tex1DS32S32; |
2750 | case Intrinsic::nvvm_tex_1d_v4s32_f32: |
2751 | return NVPTXISD::Tex1DS32Float; |
2752 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: |
2753 | return NVPTXISD::Tex1DS32FloatLevel; |
2754 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: |
2755 | return NVPTXISD::Tex1DS32FloatGrad; |
2756 | case Intrinsic::nvvm_tex_1d_v4u32_s32: |
2757 | return NVPTXISD::Tex1DU32S32; |
2758 | case Intrinsic::nvvm_tex_1d_v4u32_f32: |
2759 | return NVPTXISD::Tex1DU32Float; |
2760 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: |
2761 | return NVPTXISD::Tex1DU32FloatLevel; |
2762 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: |
2763 | return NVPTXISD::Tex1DU32FloatGrad; |
2764 | |
2765 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: |
2766 | return NVPTXISD::Tex1DArrayFloatS32; |
2767 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: |
2768 | return NVPTXISD::Tex1DArrayFloatFloat; |
2769 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: |
2770 | return NVPTXISD::Tex1DArrayFloatFloatLevel; |
2771 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: |
2772 | return NVPTXISD::Tex1DArrayFloatFloatGrad; |
2773 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: |
2774 | return NVPTXISD::Tex1DArrayS32S32; |
2775 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: |
2776 | return NVPTXISD::Tex1DArrayS32Float; |
2777 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: |
2778 | return NVPTXISD::Tex1DArrayS32FloatLevel; |
2779 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: |
2780 | return NVPTXISD::Tex1DArrayS32FloatGrad; |
2781 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: |
2782 | return NVPTXISD::Tex1DArrayU32S32; |
2783 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: |
2784 | return NVPTXISD::Tex1DArrayU32Float; |
2785 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: |
2786 | return NVPTXISD::Tex1DArrayU32FloatLevel; |
2787 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: |
2788 | return NVPTXISD::Tex1DArrayU32FloatGrad; |
2789 | |
2790 | case Intrinsic::nvvm_tex_2d_v4f32_s32: |
2791 | return NVPTXISD::Tex2DFloatS32; |
2792 | case Intrinsic::nvvm_tex_2d_v4f32_f32: |
2793 | return NVPTXISD::Tex2DFloatFloat; |
2794 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: |
2795 | return NVPTXISD::Tex2DFloatFloatLevel; |
2796 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: |
2797 | return NVPTXISD::Tex2DFloatFloatGrad; |
2798 | case Intrinsic::nvvm_tex_2d_v4s32_s32: |
2799 | return NVPTXISD::Tex2DS32S32; |
2800 | case Intrinsic::nvvm_tex_2d_v4s32_f32: |
2801 | return NVPTXISD::Tex2DS32Float; |
2802 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: |
2803 | return NVPTXISD::Tex2DS32FloatLevel; |
2804 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: |
2805 | return NVPTXISD::Tex2DS32FloatGrad; |
2806 | case Intrinsic::nvvm_tex_2d_v4u32_s32: |
2807 | return NVPTXISD::Tex2DU32S32; |
2808 | case Intrinsic::nvvm_tex_2d_v4u32_f32: |
2809 | return NVPTXISD::Tex2DU32Float; |
2810 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: |
2811 | return NVPTXISD::Tex2DU32FloatLevel; |
2812 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: |
2813 | return NVPTXISD::Tex2DU32FloatGrad; |
2814 | |
2815 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: |
2816 | return NVPTXISD::Tex2DArrayFloatS32; |
2817 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: |
2818 | return NVPTXISD::Tex2DArrayFloatFloat; |
2819 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: |
2820 | return NVPTXISD::Tex2DArrayFloatFloatLevel; |
2821 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: |
2822 | return NVPTXISD::Tex2DArrayFloatFloatGrad; |
2823 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: |
2824 | return NVPTXISD::Tex2DArrayS32S32; |
2825 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: |
2826 | return NVPTXISD::Tex2DArrayS32Float; |
2827 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: |
2828 | return NVPTXISD::Tex2DArrayS32FloatLevel; |
2829 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: |
2830 | return NVPTXISD::Tex2DArrayS32FloatGrad; |
2831 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: |
2832 | return NVPTXISD::Tex2DArrayU32S32; |
2833 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: |
2834 | return NVPTXISD::Tex2DArrayU32Float; |
2835 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: |
2836 | return NVPTXISD::Tex2DArrayU32FloatLevel; |
2837 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: |
2838 | return NVPTXISD::Tex2DArrayU32FloatGrad; |
2839 | |
2840 | case Intrinsic::nvvm_tex_3d_v4f32_s32: |
2841 | return NVPTXISD::Tex3DFloatS32; |
2842 | case Intrinsic::nvvm_tex_3d_v4f32_f32: |
2843 | return NVPTXISD::Tex3DFloatFloat; |
2844 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: |
2845 | return NVPTXISD::Tex3DFloatFloatLevel; |
2846 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: |
2847 | return NVPTXISD::Tex3DFloatFloatGrad; |
2848 | case Intrinsic::nvvm_tex_3d_v4s32_s32: |
2849 | return NVPTXISD::Tex3DS32S32; |
2850 | case Intrinsic::nvvm_tex_3d_v4s32_f32: |
2851 | return NVPTXISD::Tex3DS32Float; |
2852 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: |
2853 | return NVPTXISD::Tex3DS32FloatLevel; |
2854 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: |
2855 | return NVPTXISD::Tex3DS32FloatGrad; |
2856 | case Intrinsic::nvvm_tex_3d_v4u32_s32: |
2857 | return NVPTXISD::Tex3DU32S32; |
2858 | case Intrinsic::nvvm_tex_3d_v4u32_f32: |
2859 | return NVPTXISD::Tex3DU32Float; |
2860 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: |
2861 | return NVPTXISD::Tex3DU32FloatLevel; |
2862 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: |
2863 | return NVPTXISD::Tex3DU32FloatGrad; |
2864 | |
2865 | case Intrinsic::nvvm_tex_cube_v4f32_f32: |
2866 | return NVPTXISD::TexCubeFloatFloat; |
2867 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: |
2868 | return NVPTXISD::TexCubeFloatFloatLevel; |
2869 | case Intrinsic::nvvm_tex_cube_v4s32_f32: |
2870 | return NVPTXISD::TexCubeS32Float; |
2871 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: |
2872 | return NVPTXISD::TexCubeS32FloatLevel; |
2873 | case Intrinsic::nvvm_tex_cube_v4u32_f32: |
2874 | return NVPTXISD::TexCubeU32Float; |
2875 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: |
2876 | return NVPTXISD::TexCubeU32FloatLevel; |
2877 | |
2878 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: |
2879 | return NVPTXISD::TexCubeArrayFloatFloat; |
2880 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: |
2881 | return NVPTXISD::TexCubeArrayFloatFloatLevel; |
2882 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: |
2883 | return NVPTXISD::TexCubeArrayS32Float; |
2884 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: |
2885 | return NVPTXISD::TexCubeArrayS32FloatLevel; |
2886 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: |
2887 | return NVPTXISD::TexCubeArrayU32Float; |
2888 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: |
2889 | return NVPTXISD::TexCubeArrayU32FloatLevel; |
2890 | |
2891 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: |
2892 | return NVPTXISD::Tld4R2DFloatFloat; |
2893 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: |
2894 | return NVPTXISD::Tld4G2DFloatFloat; |
2895 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: |
2896 | return NVPTXISD::Tld4B2DFloatFloat; |
2897 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: |
2898 | return NVPTXISD::Tld4A2DFloatFloat; |
2899 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: |
2900 | return NVPTXISD::Tld4R2DS64Float; |
2901 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: |
2902 | return NVPTXISD::Tld4G2DS64Float; |
2903 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: |
2904 | return NVPTXISD::Tld4B2DS64Float; |
2905 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: |
2906 | return NVPTXISD::Tld4A2DS64Float; |
2907 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: |
2908 | return NVPTXISD::Tld4R2DU64Float; |
2909 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: |
2910 | return NVPTXISD::Tld4G2DU64Float; |
2911 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: |
2912 | return NVPTXISD::Tld4B2DU64Float; |
2913 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: |
2914 | return NVPTXISD::Tld4A2DU64Float; |
2915 | |
2916 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: |
2917 | return NVPTXISD::TexUnified1DFloatS32; |
2918 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: |
2919 | return NVPTXISD::TexUnified1DFloatFloat; |
2920 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: |
2921 | return NVPTXISD::TexUnified1DFloatFloatLevel; |
2922 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: |
2923 | return NVPTXISD::TexUnified1DFloatFloatGrad; |
2924 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: |
2925 | return NVPTXISD::TexUnified1DS32S32; |
2926 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: |
2927 | return NVPTXISD::TexUnified1DS32Float; |
2928 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: |
2929 | return NVPTXISD::TexUnified1DS32FloatLevel; |
2930 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: |
2931 | return NVPTXISD::TexUnified1DS32FloatGrad; |
2932 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: |
2933 | return NVPTXISD::TexUnified1DU32S32; |
2934 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: |
2935 | return NVPTXISD::TexUnified1DU32Float; |
2936 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: |
2937 | return NVPTXISD::TexUnified1DU32FloatLevel; |
2938 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: |
2939 | return NVPTXISD::TexUnified1DU32FloatGrad; |
2940 | |
2941 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: |
2942 | return NVPTXISD::TexUnified1DArrayFloatS32; |
2943 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: |
2944 | return NVPTXISD::TexUnified1DArrayFloatFloat; |
2945 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: |
2946 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; |
2947 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: |
2948 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; |
2949 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: |
2950 | return NVPTXISD::TexUnified1DArrayS32S32; |
2951 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: |
2952 | return NVPTXISD::TexUnified1DArrayS32Float; |
2953 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: |
2954 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; |
2955 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: |
2956 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; |
2957 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: |
2958 | return NVPTXISD::TexUnified1DArrayU32S32; |
2959 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: |
2960 | return NVPTXISD::TexUnified1DArrayU32Float; |
2961 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: |
2962 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; |
2963 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: |
2964 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; |
2965 | |
2966 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: |
2967 | return NVPTXISD::TexUnified2DFloatS32; |
2968 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: |
2969 | return NVPTXISD::TexUnified2DFloatFloat; |
2970 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: |
2971 | return NVPTXISD::TexUnified2DFloatFloatLevel; |
2972 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: |
2973 | return NVPTXISD::TexUnified2DFloatFloatGrad; |
2974 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: |
2975 | return NVPTXISD::TexUnified2DS32S32; |
2976 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: |
2977 | return NVPTXISD::TexUnified2DS32Float; |
2978 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: |
2979 | return NVPTXISD::TexUnified2DS32FloatLevel; |
2980 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: |
2981 | return NVPTXISD::TexUnified2DS32FloatGrad; |
2982 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: |
2983 | return NVPTXISD::TexUnified2DU32S32; |
2984 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: |
2985 | return NVPTXISD::TexUnified2DU32Float; |
2986 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: |
2987 | return NVPTXISD::TexUnified2DU32FloatLevel; |
2988 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: |
2989 | return NVPTXISD::TexUnified2DU32FloatGrad; |
2990 | |
2991 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: |
2992 | return NVPTXISD::TexUnified2DArrayFloatS32; |
2993 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: |
2994 | return NVPTXISD::TexUnified2DArrayFloatFloat; |
2995 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: |
2996 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; |
2997 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: |
2998 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; |
2999 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: |
3000 | return NVPTXISD::TexUnified2DArrayS32S32; |
3001 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: |
3002 | return NVPTXISD::TexUnified2DArrayS32Float; |
3003 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: |
3004 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; |
3005 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: |
3006 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; |
3007 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: |
3008 | return NVPTXISD::TexUnified2DArrayU32S32; |
3009 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: |
3010 | return NVPTXISD::TexUnified2DArrayU32Float; |
3011 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: |
3012 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; |
3013 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: |
3014 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; |
3015 | |
3016 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: |
3017 | return NVPTXISD::TexUnified3DFloatS32; |
3018 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: |
3019 | return NVPTXISD::TexUnified3DFloatFloat; |
3020 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: |
3021 | return NVPTXISD::TexUnified3DFloatFloatLevel; |
3022 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: |
3023 | return NVPTXISD::TexUnified3DFloatFloatGrad; |
3024 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: |
3025 | return NVPTXISD::TexUnified3DS32S32; |
3026 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: |
3027 | return NVPTXISD::TexUnified3DS32Float; |
3028 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: |
3029 | return NVPTXISD::TexUnified3DS32FloatLevel; |
3030 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: |
3031 | return NVPTXISD::TexUnified3DS32FloatGrad; |
3032 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: |
3033 | return NVPTXISD::TexUnified3DU32S32; |
3034 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: |
3035 | return NVPTXISD::TexUnified3DU32Float; |
3036 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: |
3037 | return NVPTXISD::TexUnified3DU32FloatLevel; |
3038 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: |
3039 | return NVPTXISD::TexUnified3DU32FloatGrad; |
3040 | |
3041 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: |
3042 | return NVPTXISD::TexUnifiedCubeFloatFloat; |
3043 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: |
3044 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; |
3045 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: |
3046 | return NVPTXISD::TexUnifiedCubeS32Float; |
3047 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: |
3048 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; |
3049 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: |
3050 | return NVPTXISD::TexUnifiedCubeU32Float; |
3051 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: |
3052 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; |
3053 | |
3054 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: |
3055 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; |
3056 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: |
3057 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; |
3058 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: |
3059 | return NVPTXISD::TexUnifiedCubeArrayS32Float; |
3060 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: |
3061 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; |
3062 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: |
3063 | return NVPTXISD::TexUnifiedCubeArrayU32Float; |
3064 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: |
3065 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; |
3066 | |
3067 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: |
3068 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; |
3069 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: |
3070 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; |
3071 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: |
3072 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; |
3073 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: |
3074 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; |
3075 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: |
3076 | return NVPTXISD::Tld4UnifiedR2DS64Float; |
3077 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: |
3078 | return NVPTXISD::Tld4UnifiedG2DS64Float; |
3079 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: |
3080 | return NVPTXISD::Tld4UnifiedB2DS64Float; |
3081 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: |
3082 | return NVPTXISD::Tld4UnifiedA2DS64Float; |
3083 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: |
3084 | return NVPTXISD::Tld4UnifiedR2DU64Float; |
3085 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: |
3086 | return NVPTXISD::Tld4UnifiedG2DU64Float; |
3087 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: |
3088 | return NVPTXISD::Tld4UnifiedB2DU64Float; |
3089 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: |
3090 | return NVPTXISD::Tld4UnifiedA2DU64Float; |
3091 | } |
3092 | } |
3093 | |
3094 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { |
3095 | switch (Intrinsic) { |
3096 | default: |
3097 | return 0; |
3098 | case Intrinsic::nvvm_suld_1d_i8_clamp: |
3099 | return NVPTXISD::Suld1DI8Clamp; |
3100 | case Intrinsic::nvvm_suld_1d_i16_clamp: |
3101 | return NVPTXISD::Suld1DI16Clamp; |
3102 | case Intrinsic::nvvm_suld_1d_i32_clamp: |
3103 | return NVPTXISD::Suld1DI32Clamp; |
3104 | case Intrinsic::nvvm_suld_1d_i64_clamp: |
3105 | return NVPTXISD::Suld1DI64Clamp; |
3106 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: |
3107 | return NVPTXISD::Suld1DV2I8Clamp; |
3108 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: |
3109 | return NVPTXISD::Suld1DV2I16Clamp; |
3110 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: |
3111 | return NVPTXISD::Suld1DV2I32Clamp; |
3112 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: |
3113 | return NVPTXISD::Suld1DV2I64Clamp; |
3114 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: |
3115 | return NVPTXISD::Suld1DV4I8Clamp; |
3116 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: |
3117 | return NVPTXISD::Suld1DV4I16Clamp; |
3118 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: |
3119 | return NVPTXISD::Suld1DV4I32Clamp; |
3120 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: |
3121 | return NVPTXISD::Suld1DArrayI8Clamp; |
3122 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: |
3123 | return NVPTXISD::Suld1DArrayI16Clamp; |
3124 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: |
3125 | return NVPTXISD::Suld1DArrayI32Clamp; |
3126 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: |
3127 | return NVPTXISD::Suld1DArrayI64Clamp; |
3128 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: |
3129 | return NVPTXISD::Suld1DArrayV2I8Clamp; |
3130 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: |
3131 | return NVPTXISD::Suld1DArrayV2I16Clamp; |
3132 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: |
3133 | return NVPTXISD::Suld1DArrayV2I32Clamp; |
3134 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: |
3135 | return NVPTXISD::Suld1DArrayV2I64Clamp; |
3136 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: |
3137 | return NVPTXISD::Suld1DArrayV4I8Clamp; |
3138 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: |
3139 | return NVPTXISD::Suld1DArrayV4I16Clamp; |
3140 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: |
3141 | return NVPTXISD::Suld1DArrayV4I32Clamp; |
3142 | case Intrinsic::nvvm_suld_2d_i8_clamp: |
3143 | return NVPTXISD::Suld2DI8Clamp; |
3144 | case Intrinsic::nvvm_suld_2d_i16_clamp: |
3145 | return NVPTXISD::Suld2DI16Clamp; |
3146 | case Intrinsic::nvvm_suld_2d_i32_clamp: |
3147 | return NVPTXISD::Suld2DI32Clamp; |
3148 | case Intrinsic::nvvm_suld_2d_i64_clamp: |
3149 | return NVPTXISD::Suld2DI64Clamp; |
3150 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: |
3151 | return NVPTXISD::Suld2DV2I8Clamp; |
3152 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: |
3153 | return NVPTXISD::Suld2DV2I16Clamp; |
3154 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: |
3155 | return NVPTXISD::Suld2DV2I32Clamp; |
3156 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: |
3157 | return NVPTXISD::Suld2DV2I64Clamp; |
3158 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: |
3159 | return NVPTXISD::Suld2DV4I8Clamp; |
3160 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: |
3161 | return NVPTXISD::Suld2DV4I16Clamp; |
3162 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: |
3163 | return NVPTXISD::Suld2DV4I32Clamp; |
3164 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: |
3165 | return NVPTXISD::Suld2DArrayI8Clamp; |
3166 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: |
3167 | return NVPTXISD::Suld2DArrayI16Clamp; |
3168 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: |
3169 | return NVPTXISD::Suld2DArrayI32Clamp; |
3170 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: |
3171 | return NVPTXISD::Suld2DArrayI64Clamp; |
3172 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: |
3173 | return NVPTXISD::Suld2DArrayV2I8Clamp; |
3174 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: |
3175 | return NVPTXISD::Suld2DArrayV2I16Clamp; |
3176 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: |
3177 | return NVPTXISD::Suld2DArrayV2I32Clamp; |
3178 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: |
3179 | return NVPTXISD::Suld2DArrayV2I64Clamp; |
3180 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: |
3181 | return NVPTXISD::Suld2DArrayV4I8Clamp; |
3182 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: |
3183 | return NVPTXISD::Suld2DArrayV4I16Clamp; |
3184 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: |
3185 | return NVPTXISD::Suld2DArrayV4I32Clamp; |
3186 | case Intrinsic::nvvm_suld_3d_i8_clamp: |
3187 | return NVPTXISD::Suld3DI8Clamp; |
3188 | case Intrinsic::nvvm_suld_3d_i16_clamp: |
3189 | return NVPTXISD::Suld3DI16Clamp; |
3190 | case Intrinsic::nvvm_suld_3d_i32_clamp: |
3191 | return NVPTXISD::Suld3DI32Clamp; |
3192 | case Intrinsic::nvvm_suld_3d_i64_clamp: |
3193 | return NVPTXISD::Suld3DI64Clamp; |
3194 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: |
3195 | return NVPTXISD::Suld3DV2I8Clamp; |
3196 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: |
3197 | return NVPTXISD::Suld3DV2I16Clamp; |
3198 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: |
3199 | return NVPTXISD::Suld3DV2I32Clamp; |
3200 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: |
3201 | return NVPTXISD::Suld3DV2I64Clamp; |
3202 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: |
3203 | return NVPTXISD::Suld3DV4I8Clamp; |
3204 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: |
3205 | return NVPTXISD::Suld3DV4I16Clamp; |
3206 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: |
3207 | return NVPTXISD::Suld3DV4I32Clamp; |
3208 | case Intrinsic::nvvm_suld_1d_i8_trap: |
3209 | return NVPTXISD::Suld1DI8Trap; |
3210 | case Intrinsic::nvvm_suld_1d_i16_trap: |
3211 | return NVPTXISD::Suld1DI16Trap; |
3212 | case Intrinsic::nvvm_suld_1d_i32_trap: |
3213 | return NVPTXISD::Suld1DI32Trap; |
3214 | case Intrinsic::nvvm_suld_1d_i64_trap: |
3215 | return NVPTXISD::Suld1DI64Trap; |
3216 | case Intrinsic::nvvm_suld_1d_v2i8_trap: |
3217 | return NVPTXISD::Suld1DV2I8Trap; |
3218 | case Intrinsic::nvvm_suld_1d_v2i16_trap: |
3219 | return NVPTXISD::Suld1DV2I16Trap; |
3220 | case Intrinsic::nvvm_suld_1d_v2i32_trap: |
3221 | return NVPTXISD::Suld1DV2I32Trap; |
3222 | case Intrinsic::nvvm_suld_1d_v2i64_trap: |
3223 | return NVPTXISD::Suld1DV2I64Trap; |
3224 | case Intrinsic::nvvm_suld_1d_v4i8_trap: |
3225 | return NVPTXISD::Suld1DV4I8Trap; |
3226 | case Intrinsic::nvvm_suld_1d_v4i16_trap: |
3227 | return NVPTXISD::Suld1DV4I16Trap; |
3228 | case Intrinsic::nvvm_suld_1d_v4i32_trap: |
3229 | return NVPTXISD::Suld1DV4I32Trap; |
3230 | case Intrinsic::nvvm_suld_1d_array_i8_trap: |
3231 | return NVPTXISD::Suld1DArrayI8Trap; |
3232 | case Intrinsic::nvvm_suld_1d_array_i16_trap: |
3233 | return NVPTXISD::Suld1DArrayI16Trap; |
3234 | case Intrinsic::nvvm_suld_1d_array_i32_trap: |
3235 | return NVPTXISD::Suld1DArrayI32Trap; |
3236 | case Intrinsic::nvvm_suld_1d_array_i64_trap: |
3237 | return NVPTXISD::Suld1DArrayI64Trap; |
3238 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: |
3239 | return NVPTXISD::Suld1DArrayV2I8Trap; |
3240 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: |
3241 | return NVPTXISD::Suld1DArrayV2I16Trap; |
3242 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: |
3243 | return NVPTXISD::Suld1DArrayV2I32Trap; |
3244 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: |
3245 | return NVPTXISD::Suld1DArrayV2I64Trap; |
3246 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: |
3247 | return NVPTXISD::Suld1DArrayV4I8Trap; |
3248 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: |
3249 | return NVPTXISD::Suld1DArrayV4I16Trap; |
3250 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: |
3251 | return NVPTXISD::Suld1DArrayV4I32Trap; |
3252 | case Intrinsic::nvvm_suld_2d_i8_trap: |
3253 | return NVPTXISD::Suld2DI8Trap; |
3254 | case Intrinsic::nvvm_suld_2d_i16_trap: |
3255 | return NVPTXISD::Suld2DI16Trap; |
3256 | case Intrinsic::nvvm_suld_2d_i32_trap: |
3257 | return NVPTXISD::Suld2DI32Trap; |
3258 | case Intrinsic::nvvm_suld_2d_i64_trap: |
3259 | return NVPTXISD::Suld2DI64Trap; |
3260 | case Intrinsic::nvvm_suld_2d_v2i8_trap: |
3261 | return NVPTXISD::Suld2DV2I8Trap; |
3262 | case Intrinsic::nvvm_suld_2d_v2i16_trap: |
3263 | return NVPTXISD::Suld2DV2I16Trap; |
3264 | case Intrinsic::nvvm_suld_2d_v2i32_trap: |
3265 | return NVPTXISD::Suld2DV2I32Trap; |
3266 | case Intrinsic::nvvm_suld_2d_v2i64_trap: |
3267 | return NVPTXISD::Suld2DV2I64Trap; |
3268 | case Intrinsic::nvvm_suld_2d_v4i8_trap: |
3269 | return NVPTXISD::Suld2DV4I8Trap; |
3270 | case Intrinsic::nvvm_suld_2d_v4i16_trap: |
3271 | return NVPTXISD::Suld2DV4I16Trap; |
3272 | case Intrinsic::nvvm_suld_2d_v4i32_trap: |
3273 | return NVPTXISD::Suld2DV4I32Trap; |
3274 | case Intrinsic::nvvm_suld_2d_array_i8_trap: |
3275 | return NVPTXISD::Suld2DArrayI8Trap; |
3276 | case Intrinsic::nvvm_suld_2d_array_i16_trap: |
3277 | return NVPTXISD::Suld2DArrayI16Trap; |
3278 | case Intrinsic::nvvm_suld_2d_array_i32_trap: |
3279 | return NVPTXISD::Suld2DArrayI32Trap; |
3280 | case Intrinsic::nvvm_suld_2d_array_i64_trap: |
3281 | return NVPTXISD::Suld2DArrayI64Trap; |
3282 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: |
3283 | return NVPTXISD::Suld2DArrayV2I8Trap; |
3284 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: |
3285 | return NVPTXISD::Suld2DArrayV2I16Trap; |
3286 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: |
3287 | return NVPTXISD::Suld2DArrayV2I32Trap; |
3288 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: |
3289 | return NVPTXISD::Suld2DArrayV2I64Trap; |
3290 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: |
3291 | return NVPTXISD::Suld2DArrayV4I8Trap; |
3292 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: |
3293 | return NVPTXISD::Suld2DArrayV4I16Trap; |
3294 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: |
3295 | return NVPTXISD::Suld2DArrayV4I32Trap; |
3296 | case Intrinsic::nvvm_suld_3d_i8_trap: |
3297 | return NVPTXISD::Suld3DI8Trap; |
3298 | case Intrinsic::nvvm_suld_3d_i16_trap: |
3299 | return NVPTXISD::Suld3DI16Trap; |
3300 | case Intrinsic::nvvm_suld_3d_i32_trap: |
3301 | return NVPTXISD::Suld3DI32Trap; |
3302 | case Intrinsic::nvvm_suld_3d_i64_trap: |
3303 | return NVPTXISD::Suld3DI64Trap; |
3304 | case Intrinsic::nvvm_suld_3d_v2i8_trap: |
3305 | return NVPTXISD::Suld3DV2I8Trap; |
3306 | case Intrinsic::nvvm_suld_3d_v2i16_trap: |
3307 | return NVPTXISD::Suld3DV2I16Trap; |
3308 | case Intrinsic::nvvm_suld_3d_v2i32_trap: |
3309 | return NVPTXISD::Suld3DV2I32Trap; |
3310 | case Intrinsic::nvvm_suld_3d_v2i64_trap: |
3311 | return NVPTXISD::Suld3DV2I64Trap; |
3312 | case Intrinsic::nvvm_suld_3d_v4i8_trap: |
3313 | return NVPTXISD::Suld3DV4I8Trap; |
3314 | case Intrinsic::nvvm_suld_3d_v4i16_trap: |
3315 | return NVPTXISD::Suld3DV4I16Trap; |
3316 | case Intrinsic::nvvm_suld_3d_v4i32_trap: |
3317 | return NVPTXISD::Suld3DV4I32Trap; |
3318 | case Intrinsic::nvvm_suld_1d_i8_zero: |
3319 | return NVPTXISD::Suld1DI8Zero; |
3320 | case Intrinsic::nvvm_suld_1d_i16_zero: |
3321 | return NVPTXISD::Suld1DI16Zero; |
3322 | case Intrinsic::nvvm_suld_1d_i32_zero: |
3323 | return NVPTXISD::Suld1DI32Zero; |
3324 | case Intrinsic::nvvm_suld_1d_i64_zero: |
3325 | return NVPTXISD::Suld1DI64Zero; |
3326 | case Intrinsic::nvvm_suld_1d_v2i8_zero: |
3327 | return NVPTXISD::Suld1DV2I8Zero; |
3328 | case Intrinsic::nvvm_suld_1d_v2i16_zero: |
3329 | return NVPTXISD::Suld1DV2I16Zero; |
3330 | case Intrinsic::nvvm_suld_1d_v2i32_zero: |
3331 | return NVPTXISD::Suld1DV2I32Zero; |
3332 | case Intrinsic::nvvm_suld_1d_v2i64_zero: |
3333 | return NVPTXISD::Suld1DV2I64Zero; |
3334 | case Intrinsic::nvvm_suld_1d_v4i8_zero: |
3335 | return NVPTXISD::Suld1DV4I8Zero; |
3336 | case Intrinsic::nvvm_suld_1d_v4i16_zero: |
3337 | return NVPTXISD::Suld1DV4I16Zero; |
3338 | case Intrinsic::nvvm_suld_1d_v4i32_zero: |
3339 | return NVPTXISD::Suld1DV4I32Zero; |
3340 | case Intrinsic::nvvm_suld_1d_array_i8_zero: |
3341 | return NVPTXISD::Suld1DArrayI8Zero; |
3342 | case Intrinsic::nvvm_suld_1d_array_i16_zero: |
3343 | return NVPTXISD::Suld1DArrayI16Zero; |
3344 | case Intrinsic::nvvm_suld_1d_array_i32_zero: |
3345 | return NVPTXISD::Suld1DArrayI32Zero; |
3346 | case Intrinsic::nvvm_suld_1d_array_i64_zero: |
3347 | return NVPTXISD::Suld1DArrayI64Zero; |
3348 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: |
3349 | return NVPTXISD::Suld1DArrayV2I8Zero; |
3350 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: |
3351 | return NVPTXISD::Suld1DArrayV2I16Zero; |
3352 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: |
3353 | return NVPTXISD::Suld1DArrayV2I32Zero; |
3354 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: |
3355 | return NVPTXISD::Suld1DArrayV2I64Zero; |
3356 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: |
3357 | return NVPTXISD::Suld1DArrayV4I8Zero; |
3358 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: |
3359 | return NVPTXISD::Suld1DArrayV4I16Zero; |
3360 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: |
3361 | return NVPTXISD::Suld1DArrayV4I32Zero; |
3362 | case Intrinsic::nvvm_suld_2d_i8_zero: |
3363 | return NVPTXISD::Suld2DI8Zero; |
3364 | case Intrinsic::nvvm_suld_2d_i16_zero: |
3365 | return NVPTXISD::Suld2DI16Zero; |
3366 | case Intrinsic::nvvm_suld_2d_i32_zero: |
3367 | return NVPTXISD::Suld2DI32Zero; |
3368 | case Intrinsic::nvvm_suld_2d_i64_zero: |
3369 | return NVPTXISD::Suld2DI64Zero; |
3370 | case Intrinsic::nvvm_suld_2d_v2i8_zero: |
3371 | return NVPTXISD::Suld2DV2I8Zero; |
3372 | case Intrinsic::nvvm_suld_2d_v2i16_zero: |
3373 | return NVPTXISD::Suld2DV2I16Zero; |
3374 | case Intrinsic::nvvm_suld_2d_v2i32_zero: |
3375 | return NVPTXISD::Suld2DV2I32Zero; |
3376 | case Intrinsic::nvvm_suld_2d_v2i64_zero: |
3377 | return NVPTXISD::Suld2DV2I64Zero; |
3378 | case Intrinsic::nvvm_suld_2d_v4i8_zero: |
3379 | return NVPTXISD::Suld2DV4I8Zero; |
3380 | case Intrinsic::nvvm_suld_2d_v4i16_zero: |
3381 | return NVPTXISD::Suld2DV4I16Zero; |
3382 | case Intrinsic::nvvm_suld_2d_v4i32_zero: |
3383 | return NVPTXISD::Suld2DV4I32Zero; |
3384 | case Intrinsic::nvvm_suld_2d_array_i8_zero: |
3385 | return NVPTXISD::Suld2DArrayI8Zero; |
3386 | case Intrinsic::nvvm_suld_2d_array_i16_zero: |
3387 | return NVPTXISD::Suld2DArrayI16Zero; |
3388 | case Intrinsic::nvvm_suld_2d_array_i32_zero: |
3389 | return NVPTXISD::Suld2DArrayI32Zero; |
3390 | case Intrinsic::nvvm_suld_2d_array_i64_zero: |
3391 | return NVPTXISD::Suld2DArrayI64Zero; |
3392 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: |
3393 | return NVPTXISD::Suld2DArrayV2I8Zero; |
3394 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: |
3395 | return NVPTXISD::Suld2DArrayV2I16Zero; |
3396 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: |
3397 | return NVPTXISD::Suld2DArrayV2I32Zero; |
3398 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: |
3399 | return NVPTXISD::Suld2DArrayV2I64Zero; |
3400 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: |
3401 | return NVPTXISD::Suld2DArrayV4I8Zero; |
3402 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: |
3403 | return NVPTXISD::Suld2DArrayV4I16Zero; |
3404 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: |
3405 | return NVPTXISD::Suld2DArrayV4I32Zero; |
3406 | case Intrinsic::nvvm_suld_3d_i8_zero: |
3407 | return NVPTXISD::Suld3DI8Zero; |
3408 | case Intrinsic::nvvm_suld_3d_i16_zero: |
3409 | return NVPTXISD::Suld3DI16Zero; |
3410 | case Intrinsic::nvvm_suld_3d_i32_zero: |
3411 | return NVPTXISD::Suld3DI32Zero; |
3412 | case Intrinsic::nvvm_suld_3d_i64_zero: |
3413 | return NVPTXISD::Suld3DI64Zero; |
3414 | case Intrinsic::nvvm_suld_3d_v2i8_zero: |
3415 | return NVPTXISD::Suld3DV2I8Zero; |
3416 | case Intrinsic::nvvm_suld_3d_v2i16_zero: |
3417 | return NVPTXISD::Suld3DV2I16Zero; |
3418 | case Intrinsic::nvvm_suld_3d_v2i32_zero: |
3419 | return NVPTXISD::Suld3DV2I32Zero; |
3420 | case Intrinsic::nvvm_suld_3d_v2i64_zero: |
3421 | return NVPTXISD::Suld3DV2I64Zero; |
3422 | case Intrinsic::nvvm_suld_3d_v4i8_zero: |
3423 | return NVPTXISD::Suld3DV4I8Zero; |
3424 | case Intrinsic::nvvm_suld_3d_v4i16_zero: |
3425 | return NVPTXISD::Suld3DV4I16Zero; |
3426 | case Intrinsic::nvvm_suld_3d_v4i32_zero: |
3427 | return NVPTXISD::Suld3DV4I32Zero; |
3428 | } |
3429 | } |
3430 | |
3431 | |
3432 | |
3433 | |
3434 | |
3435 | |
3436 | bool NVPTXTargetLowering::getTgtMemIntrinsic( |
3437 | IntrinsicInfo &Info, const CallInst &I, |
3438 | MachineFunction &MF, unsigned Intrinsic) const { |
3439 | switch (Intrinsic) { |
3440 | default: |
3441 | return false; |
3442 | case Intrinsic::nvvm_match_all_sync_i32p: |
3443 | case Intrinsic::nvvm_match_all_sync_i64p: |
3444 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3445 | |
3446 | |
3447 | |
3448 | Info.memVT = MVT::i1; |
3449 | |
3450 | |
3451 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
3452 | return true; |
3453 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col: |
3454 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row: |
3455 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride: |
3456 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride: |
3457 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col: |
3458 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row: |
3459 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride: |
3460 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride: |
3461 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col: |
3462 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row: |
3463 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride: |
3464 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride: |
3465 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col: |
3466 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row: |
3467 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride: |
3468 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride: |
3469 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col: |
3470 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row: |
3471 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride: |
3472 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride: |
3473 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col: |
3474 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row: |
3475 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride: |
3476 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: { |
3477 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3478 | Info.memVT = MVT::v8f16; |
3479 | Info.ptrVal = I.getArgOperand(0); |
3480 | Info.offset = 0; |
3481 | Info.flags = MachineMemOperand::MOLoad; |
3482 | Info.align = Align(16); |
3483 | return true; |
3484 | } |
3485 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col: |
3486 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride: |
3487 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride: |
3488 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col: |
3489 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row: |
3490 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride: |
3491 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride: |
3492 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row: |
3493 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col: |
3494 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride: |
3495 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row: |
3496 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride: |
3497 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col: |
3498 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride: |
3499 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride: |
3500 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col: |
3501 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row: |
3502 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride: |
3503 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride: |
3504 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: |
3505 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col: |
3506 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride: |
3507 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row: |
3508 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: { |
3509 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3510 | Info.memVT = MVT::v2i32; |
3511 | Info.ptrVal = I.getArgOperand(0); |
3512 | Info.offset = 0; |
3513 | Info.flags = MachineMemOperand::MOLoad; |
3514 | Info.align = Align(8); |
3515 | return true; |
3516 | } |
3517 | |
3518 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col: |
3519 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride: |
3520 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride: |
3521 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col: |
3522 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row: |
3523 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride: |
3524 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride: |
3525 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row: |
3526 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col: |
3527 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride: |
3528 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row: |
3529 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride: |
3530 | case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col: |
3531 | case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride: |
3532 | case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row: |
3533 | case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride: |
3534 | |
3535 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col: |
3536 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride: |
3537 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride: |
3538 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col: |
3539 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row: |
3540 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride: |
3541 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride: |
3542 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: |
3543 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col: |
3544 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride: |
3545 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row: |
3546 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride: |
3547 | case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col: |
3548 | case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride: |
3549 | case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row: |
3550 | case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride: |
3551 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16: |
3552 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: { |
3553 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3554 | Info.memVT = MVT::v4i32; |
3555 | Info.ptrVal = I.getArgOperand(0); |
3556 | Info.offset = 0; |
3557 | Info.flags = MachineMemOperand::MOLoad; |
3558 | Info.align = Align(16); |
3559 | return true; |
3560 | } |
3561 | |
3562 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col: |
3563 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride: |
3564 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride: |
3565 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col: |
3566 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row: |
3567 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride: |
3568 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride: |
3569 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row: |
3570 | |
3571 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col: |
3572 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride: |
3573 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride: |
3574 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col: |
3575 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row: |
3576 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride: |
3577 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride: |
3578 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row: |
3579 | case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row: |
3580 | case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride: |
3581 | case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col: |
3582 | case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride: |
3583 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row: |
3584 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride: |
3585 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride: |
3586 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row: |
3587 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col: |
3588 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride: |
3589 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride: |
3590 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: |
3591 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16: |
3592 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: { |
3593 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3594 | Info.memVT = MVT::i32; |
3595 | Info.ptrVal = I.getArgOperand(0); |
3596 | Info.offset = 0; |
3597 | Info.flags = MachineMemOperand::MOLoad; |
3598 | Info.align = Align(4); |
3599 | return true; |
3600 | } |
3601 | |
3602 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col: |
3603 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row: |
3604 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride: |
3605 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride: |
3606 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col: |
3607 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row: |
3608 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride: |
3609 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride: |
3610 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col: |
3611 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row: |
3612 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride: |
3613 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: { |
3614 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3615 | Info.memVT = MVT::v4f16; |
3616 | Info.ptrVal = I.getArgOperand(0); |
3617 | Info.offset = 0; |
3618 | Info.flags = MachineMemOperand::MOLoad; |
3619 | Info.align = Align(16); |
3620 | return true; |
3621 | } |
3622 | |
3623 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col: |
3624 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row: |
3625 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride: |
3626 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride: |
3627 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col: |
3628 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row: |
3629 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride: |
3630 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride: |
3631 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col: |
3632 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row: |
3633 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride: |
3634 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: |
3635 | case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col: |
3636 | case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row: |
3637 | case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride: |
3638 | case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: { |
3639 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3640 | Info.memVT = MVT::v8f32; |
3641 | Info.ptrVal = I.getArgOperand(0); |
3642 | Info.offset = 0; |
3643 | Info.flags = MachineMemOperand::MOLoad; |
3644 | Info.align = Align(16); |
3645 | return true; |
3646 | } |
3647 | |
3648 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col: |
3649 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride: |
3650 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row: |
3651 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride: |
3652 | |
3653 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col: |
3654 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride: |
3655 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row: |
3656 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride: |
3657 | |
3658 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col: |
3659 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride: |
3660 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row: |
3661 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride: |
3662 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col: |
3663 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride: |
3664 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row: |
3665 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride: |
3666 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col: |
3667 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride: |
3668 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row: |
3669 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: { |
3670 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3671 | Info.memVT = MVT::v8i32; |
3672 | Info.ptrVal = I.getArgOperand(0); |
3673 | Info.offset = 0; |
3674 | Info.flags = MachineMemOperand::MOLoad; |
3675 | Info.align = Align(16); |
3676 | return true; |
3677 | } |
3678 | |
3679 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col: |
3680 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride: |
3681 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row: |
3682 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride: |
3683 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col: |
3684 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride: |
3685 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row: |
3686 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: |
3687 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16: |
3688 | case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: { |
3689 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3690 | Info.memVT = MVT::v2i32; |
3691 | Info.ptrVal = I.getArgOperand(0); |
3692 | Info.offset = 0; |
3693 | Info.flags = MachineMemOperand::MOLoad; |
3694 | Info.align = Align(8); |
3695 | return true; |
3696 | } |
3697 | |
3698 | case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col: |
3699 | case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride: |
3700 | case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row: |
3701 | case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride: |
3702 | |
3703 | case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col: |
3704 | case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride: |
3705 | case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row: |
3706 | case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: { |
3707 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3708 | Info.memVT = MVT::f64; |
3709 | Info.ptrVal = I.getArgOperand(0); |
3710 | Info.offset = 0; |
3711 | Info.flags = MachineMemOperand::MOLoad; |
3712 | Info.align = Align(8); |
3713 | return true; |
3714 | } |
3715 | |
3716 | case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col: |
3717 | case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride: |
3718 | case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row: |
3719 | case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: { |
3720 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3721 | Info.memVT = MVT::v2f64; |
3722 | Info.ptrVal = I.getArgOperand(0); |
3723 | Info.offset = 0; |
3724 | Info.flags = MachineMemOperand::MOLoad; |
3725 | Info.align = Align(16); |
3726 | return true; |
3727 | } |
3728 | |
3729 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col: |
3730 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row: |
3731 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride: |
3732 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride: |
3733 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col: |
3734 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row: |
3735 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride: |
3736 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride: |
3737 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col: |
3738 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row: |
3739 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride: |
3740 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: { |
3741 | Info.opc = ISD::INTRINSIC_VOID; |
3742 | Info.memVT = MVT::v4f16; |
3743 | Info.ptrVal = I.getArgOperand(0); |
3744 | Info.offset = 0; |
3745 | Info.flags = MachineMemOperand::MOStore; |
3746 | Info.align = Align(16); |
3747 | return true; |
3748 | } |
3749 | |
3750 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col: |
3751 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row: |
3752 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride: |
3753 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride: |
3754 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col: |
3755 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row: |
3756 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride: |
3757 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride: |
3758 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col: |
3759 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row: |
3760 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride: |
3761 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: |
3762 | case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col: |
3763 | case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row: |
3764 | case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride: |
3765 | case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: { |
3766 | Info.opc = ISD::INTRINSIC_VOID; |
3767 | Info.memVT = MVT::v8f32; |
3768 | Info.ptrVal = I.getArgOperand(0); |
3769 | Info.offset = 0; |
3770 | Info.flags = MachineMemOperand::MOStore; |
3771 | Info.align = Align(16); |
3772 | return true; |
3773 | } |
3774 | |
3775 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col: |
3776 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride: |
3777 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row: |
3778 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride: |
3779 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col: |
3780 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride: |
3781 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row: |
3782 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride: |
3783 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col: |
3784 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride: |
3785 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row: |
3786 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: { |
3787 | Info.opc = ISD::INTRINSIC_VOID; |
3788 | Info.memVT = MVT::v8i32; |
3789 | Info.ptrVal = I.getArgOperand(0); |
3790 | Info.offset = 0; |
3791 | Info.flags = MachineMemOperand::MOStore; |
3792 | Info.align = Align(16); |
3793 | return true; |
3794 | } |
3795 | |
3796 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col: |
3797 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride: |
3798 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row: |
3799 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride: |
3800 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col: |
3801 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride: |
3802 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row: |
3803 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: { |
3804 | Info.opc = ISD::INTRINSIC_VOID; |
3805 | Info.memVT = MVT::v2i32; |
3806 | Info.ptrVal = I.getArgOperand(0); |
3807 | Info.offset = 0; |
3808 | Info.flags = MachineMemOperand::MOStore; |
3809 | Info.align = Align(8); |
3810 | return true; |
3811 | } |
3812 | |
3813 | case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col: |
3814 | case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride: |
3815 | case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row: |
3816 | case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: { |
3817 | Info.opc = ISD::INTRINSIC_VOID; |
3818 | Info.memVT = MVT::v2f64; |
3819 | Info.ptrVal = I.getArgOperand(0); |
3820 | Info.offset = 0; |
3821 | Info.flags = MachineMemOperand::MOStore; |
3822 | Info.align = Align(16); |
3823 | return true; |
3824 | } |
3825 | |
3826 | case Intrinsic::nvvm_atomic_load_inc_32: |
3827 | case Intrinsic::nvvm_atomic_load_dec_32: |
3828 | |
3829 | case Intrinsic::nvvm_atomic_add_gen_f_cta: |
3830 | case Intrinsic::nvvm_atomic_add_gen_f_sys: |
3831 | case Intrinsic::nvvm_atomic_add_gen_i_cta: |
3832 | case Intrinsic::nvvm_atomic_add_gen_i_sys: |
3833 | case Intrinsic::nvvm_atomic_and_gen_i_cta: |
3834 | case Intrinsic::nvvm_atomic_and_gen_i_sys: |
3835 | case Intrinsic::nvvm_atomic_cas_gen_i_cta: |
3836 | case Intrinsic::nvvm_atomic_cas_gen_i_sys: |
3837 | case Intrinsic::nvvm_atomic_dec_gen_i_cta: |
3838 | case Intrinsic::nvvm_atomic_dec_gen_i_sys: |
3839 | case Intrinsic::nvvm_atomic_inc_gen_i_cta: |
3840 | case Intrinsic::nvvm_atomic_inc_gen_i_sys: |
3841 | case Intrinsic::nvvm_atomic_max_gen_i_cta: |
3842 | case Intrinsic::nvvm_atomic_max_gen_i_sys: |
3843 | case Intrinsic::nvvm_atomic_min_gen_i_cta: |
3844 | case Intrinsic::nvvm_atomic_min_gen_i_sys: |
3845 | case Intrinsic::nvvm_atomic_or_gen_i_cta: |
3846 | case Intrinsic::nvvm_atomic_or_gen_i_sys: |
3847 | case Intrinsic::nvvm_atomic_exch_gen_i_cta: |
3848 | case Intrinsic::nvvm_atomic_exch_gen_i_sys: |
3849 | case Intrinsic::nvvm_atomic_xor_gen_i_cta: |
3850 | case Intrinsic::nvvm_atomic_xor_gen_i_sys: { |
3851 | auto &DL = I.getModule()->getDataLayout(); |
3852 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3853 | Info.memVT = getValueType(DL, I.getType()); |
3854 | Info.ptrVal = I.getArgOperand(0); |
3855 | Info.offset = 0; |
3856 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
3857 | Info.align.reset(); |
3858 | return true; |
3859 | } |
3860 | |
3861 | case Intrinsic::nvvm_ldu_global_i: |
3862 | case Intrinsic::nvvm_ldu_global_f: |
3863 | case Intrinsic::nvvm_ldu_global_p: { |
3864 | auto &DL = I.getModule()->getDataLayout(); |
3865 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3866 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) |
3867 | Info.memVT = getValueType(DL, I.getType()); |
3868 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) |
3869 | Info.memVT = getPointerTy(DL); |
3870 | else |
3871 | Info.memVT = getValueType(DL, I.getType()); |
3872 | Info.ptrVal = I.getArgOperand(0); |
3873 | Info.offset = 0; |
3874 | Info.flags = MachineMemOperand::MOLoad; |
3875 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); |
3876 | |
3877 | return true; |
3878 | } |
3879 | case Intrinsic::nvvm_ldg_global_i: |
3880 | case Intrinsic::nvvm_ldg_global_f: |
3881 | case Intrinsic::nvvm_ldg_global_p: { |
3882 | auto &DL = I.getModule()->getDataLayout(); |
3883 | |
3884 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3885 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) |
3886 | Info.memVT = getValueType(DL, I.getType()); |
3887 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) |
3888 | Info.memVT = getPointerTy(DL); |
3889 | else |
3890 | Info.memVT = getValueType(DL, I.getType()); |
3891 | Info.ptrVal = I.getArgOperand(0); |
3892 | Info.offset = 0; |
3893 | Info.flags = MachineMemOperand::MOLoad; |
3894 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); |
3895 | |
3896 | return true; |
3897 | } |
3898 | |
3899 | case Intrinsic::nvvm_tex_1d_v4f32_s32: |
3900 | case Intrinsic::nvvm_tex_1d_v4f32_f32: |
3901 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: |
3902 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: |
3903 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: |
3904 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: |
3905 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: |
3906 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: |
3907 | case Intrinsic::nvvm_tex_2d_v4f32_s32: |
3908 | case Intrinsic::nvvm_tex_2d_v4f32_f32: |
3909 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: |
3910 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: |
3911 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: |
3912 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: |
3913 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: |
3914 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: |
3915 | case Intrinsic::nvvm_tex_3d_v4f32_s32: |
3916 | case Intrinsic::nvvm_tex_3d_v4f32_f32: |
3917 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: |
3918 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: |
3919 | case Intrinsic::nvvm_tex_cube_v4f32_f32: |
3920 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: |
3921 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: |
3922 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: |
3923 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: |
3924 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: |
3925 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: |
3926 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: |
3927 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: |
3928 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: |
3929 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: |
3930 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: |
3931 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: |
3932 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: |
3933 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: |
3934 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: |
3935 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: |
3936 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: |
3937 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: |
3938 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: |
3939 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: |
3940 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: |
3941 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: |
3942 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: |
3943 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: |
3944 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: |
3945 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: |
3946 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: |
3947 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: |
3948 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: |
3949 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: |
3950 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: |
3951 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: |
3952 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: |
3953 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: |
3954 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: |
3955 | Info.opc = getOpcForTextureInstr(Intrinsic); |
3956 | Info.memVT = MVT::v4f32; |
3957 | Info.ptrVal = nullptr; |
3958 | Info.offset = 0; |
3959 | Info.flags = MachineMemOperand::MOLoad; |
3960 | Info.align = Align(16); |
3961 | return true; |
3962 | |
3963 | case Intrinsic::nvvm_tex_1d_v4s32_s32: |
3964 | case Intrinsic::nvvm_tex_1d_v4s32_f32: |
3965 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: |
3966 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: |
3967 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: |
3968 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: |
3969 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: |
3970 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: |
3971 | case Intrinsic::nvvm_tex_2d_v4s32_s32: |
3972 | case Intrinsic::nvvm_tex_2d_v4s32_f32: |
3973 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: |
3974 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: |
3975 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: |
3976 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: |
3977 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: |
3978 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: |
3979 | case Intrinsic::nvvm_tex_3d_v4s32_s32: |
3980 | case Intrinsic::nvvm_tex_3d_v4s32_f32: |
3981 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: |
3982 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: |
3983 | case Intrinsic::nvvm_tex_cube_v4s32_f32: |
3984 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: |
3985 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: |
3986 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: |
3987 | case Intrinsic::nvvm_tex_cube_v4u32_f32: |
3988 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: |
3989 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: |
3990 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: |
3991 | case Intrinsic::nvvm_tex_1d_v4u32_s32: |
3992 | case Intrinsic::nvvm_tex_1d_v4u32_f32: |
3993 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: |
3994 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: |
3995 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: |
3996 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: |
3997 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: |
3998 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: |
3999 | case Intrinsic::nvvm_tex_2d_v4u32_s32: |
4000 | case Intrinsic::nvvm_tex_2d_v4u32_f32: |
4001 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: |
4002 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: |
4003 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: |
4004 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: |
4005 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: |
4006 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: |
4007 | case Intrinsic::nvvm_tex_3d_v4u32_s32: |
4008 | case Intrinsic::nvvm_tex_3d_v4u32_f32: |
4009 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: |
4010 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: |
4011 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: |
4012 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: |
4013 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: |
4014 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: |
4015 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: |
4016 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: |
4017 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: |
4018 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: |
4019 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: |
4020 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: |
4021 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: |
4022 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: |
4023 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: |
4024 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: |
4025 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: |
4026 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: |
4027 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: |
4028 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: |
4029 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: |
4030 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: |
4031 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: |
4032 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: |
4033 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: |
4034 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: |
4035 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: |
4036 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: |
4037 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: |
4038 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: |
4039 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: |
4040 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: |
4041 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: |
4042 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: |
4043 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: |
4044 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: |
4045 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: |
4046 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: |
4047 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: |
4048 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: |
4049 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: |
4050 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: |
4051 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: |
4052 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: |
4053 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: |
4054 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: |
4055 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: |
4056 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: |
4057 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: |
4058 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: |
4059 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: |
4060 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: |
4061 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: |
4062 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: |
4063 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: |
4064 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: |
4065 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: |
4066 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: |
4067 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: |
4068 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: |
4069 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: |
4070 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: |
4071 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: |
4072 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: |
4073 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: |
4074 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: |
4075 | Info.opc = getOpcForTextureInstr(Intrinsic); |
4076 | Info.memVT = MVT::v4i32; |
4077 | Info.ptrVal = nullptr; |
4078 | Info.offset = 0; |
4079 | Info.flags = MachineMemOperand::MOLoad; |
4080 | Info.align = Align(16); |
4081 | return true; |
4082 | |
4083 | case Intrinsic::nvvm_suld_1d_i8_clamp: |
4084 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: |
4085 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: |
4086 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: |
4087 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: |
4088 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: |
4089 | case Intrinsic::nvvm_suld_2d_i8_clamp: |
4090 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: |
4091 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: |
4092 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: |
4093 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: |
4094 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: |
4095 | case Intrinsic::nvvm_suld_3d_i8_clamp: |
4096 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: |
4097 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: |
4098 | case Intrinsic::nvvm_suld_1d_i8_trap: |
4099 | case Intrinsic::nvvm_suld_1d_v2i8_trap: |
4100 | case Intrinsic::nvvm_suld_1d_v4i8_trap: |
4101 | case Intrinsic::nvvm_suld_1d_array_i8_trap: |
4102 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: |
4103 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: |
4104 | case Intrinsic::nvvm_suld_2d_i8_trap: |
4105 | case Intrinsic::nvvm_suld_2d_v2i8_trap: |
4106 | case Intrinsic::nvvm_suld_2d_v4i8_trap: |
4107 | case Intrinsic::nvvm_suld_2d_array_i8_trap: |
4108 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: |
4109 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: |
4110 | case Intrinsic::nvvm_suld_3d_i8_trap: |
4111 | case Intrinsic::nvvm_suld_3d_v2i8_trap: |
4112 | case Intrinsic::nvvm_suld_3d_v4i8_trap: |
4113 | case Intrinsic::nvvm_suld_1d_i8_zero: |
4114 | case Intrinsic::nvvm_suld_1d_v2i8_zero: |
4115 | case Intrinsic::nvvm_suld_1d_v4i8_zero: |
4116 | case Intrinsic::nvvm_suld_1d_array_i8_zero: |
4117 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: |
4118 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: |
4119 | case Intrinsic::nvvm_suld_2d_i8_zero: |
4120 | case Intrinsic::nvvm_suld_2d_v2i8_zero: |
4121 | case Intrinsic::nvvm_suld_2d_v4i8_zero: |
4122 | case Intrinsic::nvvm_suld_2d_array_i8_zero: |
4123 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: |
4124 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: |
4125 | case Intrinsic::nvvm_suld_3d_i8_zero: |
4126 | case Intrinsic::nvvm_suld_3d_v2i8_zero: |
4127 | case Intrinsic::nvvm_suld_3d_v4i8_zero: |
4128 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
4129 | Info.memVT = MVT::i8; |
4130 | Info.ptrVal = nullptr; |
4131 | Info.offset = 0; |
4132 | Info.flags = MachineMemOperand::MOLoad; |
4133 | Info.align = Align(16); |
4134 | return true; |
4135 | |
4136 | case Intrinsic::nvvm_suld_1d_i16_clamp: |
4137 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: |
4138 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: |
4139 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: |
4140 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: |
4141 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: |
4142 | case Intrinsic::nvvm_suld_2d_i16_clamp: |
4143 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: |
4144 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: |
4145 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: |
4146 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: |
4147 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: |
4148 | case Intrinsic::nvvm_suld_3d_i16_clamp: |
4149 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: |
4150 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: |
4151 | case Intrinsic::nvvm_suld_1d_i16_trap: |
4152 | case Intrinsic::nvvm_suld_1d_v2i16_trap: |
4153 | case Intrinsic::nvvm_suld_1d_v4i16_trap: |
4154 | case Intrinsic::nvvm_suld_1d_array_i16_trap: |
4155 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: |
4156 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: |
4157 | case Intrinsic::nvvm_suld_2d_i16_trap: |
4158 | case Intrinsic::nvvm_suld_2d_v2i16_trap: |
4159 | case Intrinsic::nvvm_suld_2d_v4i16_trap: |
4160 | case Intrinsic::nvvm_suld_2d_array_i16_trap: |
4161 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: |
4162 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: |
4163 | case Intrinsic::nvvm_suld_3d_i16_trap: |
4164 | case Intrinsic::nvvm_suld_3d_v2i16_trap: |
4165 | case Intrinsic::nvvm_suld_3d_v4i16_trap: |
4166 | case Intrinsic::nvvm_suld_1d_i16_zero: |
4167 | case Intrinsic::nvvm_suld_1d_v2i16_zero: |
4168 | case Intrinsic::nvvm_suld_1d_v4i16_zero: |
4169 | case Intrinsic::nvvm_suld_1d_array_i16_zero: |
4170 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: |
4171 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: |
4172 | case Intrinsic::nvvm_suld_2d_i16_zero: |
4173 | case Intrinsic::nvvm_suld_2d_v2i16_zero: |
4174 | case Intrinsic::nvvm_suld_2d_v4i16_zero: |
4175 | case Intrinsic::nvvm_suld_2d_array_i16_zero: |
4176 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: |
4177 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: |
4178 | case Intrinsic::nvvm_suld_3d_i16_zero: |
4179 | case Intrinsic::nvvm_suld_3d_v2i16_zero: |
4180 | case Intrinsic::nvvm_suld_3d_v4i16_zero: |
4181 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
4182 | Info.memVT = MVT::i16; |
4183 | Info.ptrVal = nullptr; |
4184 | Info.offset = 0; |
4185 | Info.flags = MachineMemOperand::MOLoad; |
4186 | Info.align = Align(16); |
4187 | return true; |
4188 | |
4189 | case Intrinsic::nvvm_suld_1d_i32_clamp: |
4190 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: |
4191 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: |
4192 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: |
4193 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: |
4194 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: |
4195 | case Intrinsic::nvvm_suld_2d_i32_clamp: |
4196 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: |
4197 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: |
4198 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: |
4199 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: |
4200 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: |
4201 | case Intrinsic::nvvm_suld_3d_i32_clamp: |
4202 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: |
4203 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: |
4204 | case Intrinsic::nvvm_suld_1d_i32_trap: |
4205 | case Intrinsic::nvvm_suld_1d_v2i32_trap: |
4206 | case Intrinsic::nvvm_suld_1d_v4i32_trap: |
4207 | case Intrinsic::nvvm_suld_1d_array_i32_trap: |
4208 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: |
4209 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: |
4210 | case Intrinsic::nvvm_suld_2d_i32_trap: |
4211 | case Intrinsic::nvvm_suld_2d_v2i32_trap: |
4212 | case Intrinsic::nvvm_suld_2d_v4i32_trap: |
4213 | case Intrinsic::nvvm_suld_2d_array_i32_trap: |
4214 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: |
4215 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: |
4216 | case Intrinsic::nvvm_suld_3d_i32_trap: |
4217 | case Intrinsic::nvvm_suld_3d_v2i32_trap: |
4218 | case Intrinsic::nvvm_suld_3d_v4i32_trap: |
4219 | case Intrinsic::nvvm_suld_1d_i32_zero: |
4220 | case Intrinsic::nvvm_suld_1d_v2i32_zero: |
4221 | case Intrinsic::nvvm_suld_1d_v4i32_zero: |
4222 | case Intrinsic::nvvm_suld_1d_array_i32_zero: |
4223 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: |
4224 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: |
4225 | case Intrinsic::nvvm_suld_2d_i32_zero: |
4226 | case Intrinsic::nvvm_suld_2d_v2i32_zero: |
4227 | case Intrinsic::nvvm_suld_2d_v4i32_zero: |
4228 | case Intrinsic::nvvm_suld_2d_array_i32_zero: |
4229 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: |
4230 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: |
4231 | case Intrinsic::nvvm_suld_3d_i32_zero: |
4232 | case Intrinsic::nvvm_suld_3d_v2i32_zero: |
4233 | case Intrinsic::nvvm_suld_3d_v4i32_zero: |
4234 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
4235 | Info.memVT = MVT::i32; |
4236 | Info.ptrVal = nullptr; |
4237 | Info.offset = 0; |
4238 | Info.flags = MachineMemOperand::MOLoad; |
4239 | Info.align = Align(16); |
4240 | return true; |
4241 | |
4242 | case Intrinsic::nvvm_suld_1d_i64_clamp: |
4243 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: |
4244 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: |
4245 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: |
4246 | case Intrinsic::nvvm_suld_2d_i64_clamp: |
4247 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: |
4248 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: |
4249 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: |
4250 | case Intrinsic::nvvm_suld_3d_i64_clamp: |
4251 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: |
4252 | case Intrinsic::nvvm_suld_1d_i64_trap: |
4253 | case Intrinsic::nvvm_suld_1d_v2i64_trap: |
4254 | case Intrinsic::nvvm_suld_1d_array_i64_trap: |
4255 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: |
4256 | case Intrinsic::nvvm_suld_2d_i64_trap: |
4257 | case Intrinsic::nvvm_suld_2d_v2i64_trap: |
4258 | case Intrinsic::nvvm_suld_2d_array_i64_trap: |
4259 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: |
4260 | case Intrinsic::nvvm_suld_3d_i64_trap: |
4261 | case Intrinsic::nvvm_suld_3d_v2i64_trap: |
4262 | case Intrinsic::nvvm_suld_1d_i64_zero: |
4263 | case Intrinsic::nvvm_suld_1d_v2i64_zero: |
4264 | case Intrinsic::nvvm_suld_1d_array_i64_zero: |
4265 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: |
4266 | case Intrinsic::nvvm_suld_2d_i64_zero: |
4267 | case Intrinsic::nvvm_suld_2d_v2i64_zero: |
4268 | case Intrinsic::nvvm_suld_2d_array_i64_zero: |
4269 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: |
4270 | case Intrinsic::nvvm_suld_3d_i64_zero: |
4271 | case Intrinsic::nvvm_suld_3d_v2i64_zero: |
4272 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
4273 | Info.memVT = MVT::i64; |
4274 | Info.ptrVal = nullptr; |
4275 | Info.offset = 0; |
4276 | Info.flags = MachineMemOperand::MOLoad; |
4277 | Info.align = Align(16); |
4278 | return true; |
4279 | } |
4280 | return false; |
4281 | } |
4282 | |
4283 | |
4284 | |
4285 | |
4286 | |
4287 | |
4288 | bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
4289 | const AddrMode &AM, Type *Ty, |
4290 | unsigned AS, Instruction *I) const { |
4291 | |
4292 | |
4293 | |
4294 | |
4295 | |
4296 | |
4297 | |
4298 | |
4299 | |
4300 | if (AM.BaseGV) { |
4301 | return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; |
4302 | } |
4303 | |
4304 | switch (AM.Scale) { |
4305 | case 0: |
4306 | break; |
4307 | case 1: |
4308 | if (AM.HasBaseReg) |
4309 | return false; |
4310 | |
4311 | break; |
4312 | default: |
4313 | |
4314 | return false; |
4315 | } |
4316 | return true; |
4317 | } |
4318 | |
4319 | |
4320 | |
4321 | |
4322 | |
4323 | |
4324 | |
4325 | NVPTXTargetLowering::ConstraintType |
4326 | NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { |
4327 | if (Constraint.size() == 1) { |
4328 | switch (Constraint[0]) { |
4329 | default: |
4330 | break; |
4331 | case 'b': |
4332 | case 'r': |
4333 | case 'h': |
4334 | case 'c': |
4335 | case 'l': |
4336 | case 'f': |
4337 | case 'd': |
4338 | case '0': |
4339 | case 'N': |
4340 | return C_RegisterClass; |
4341 | } |
4342 | } |
4343 | return TargetLowering::getConstraintType(Constraint); |
4344 | } |
4345 | |
4346 | std::pair<unsigned, const TargetRegisterClass *> |
4347 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
4348 | StringRef Constraint, |
4349 | MVT VT) const { |
4350 | if (Constraint.size() == 1) { |
4351 | switch (Constraint[0]) { |
4352 | case 'b': |
4353 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); |
4354 | case 'c': |
4355 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); |
4356 | case 'h': |
4357 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); |
4358 | case 'r': |
4359 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); |
4360 | case 'l': |
4361 | case 'N': |
4362 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); |
4363 | case 'f': |
4364 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); |
4365 | case 'd': |
4366 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); |
4367 | } |
4368 | } |
4369 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
4370 | } |
4371 | |
4372 | |
4373 | |
4374 | |
4375 | |
4376 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, |
4377 | CodeGenOpt::Level OptLevel) const { |
4378 | |
4379 | if (FMAContractLevelOpt.getNumOccurrences() > 0) |
4380 | return FMAContractLevelOpt > 0; |
4381 | |
4382 | |
4383 | if (OptLevel == 0) |
4384 | return false; |
4385 | |
4386 | |
4387 | if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) |
4388 | return true; |
4389 | |
4390 | return allowUnsafeFPMath(MF); |
4391 | } |
4392 | |
4393 | bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { |
4394 | |
4395 | if (MF.getTarget().Options.UnsafeFPMath) |
4396 | return true; |
4397 | |
4398 | |
4399 | const Function &F = MF.getFunction(); |
4400 | return F.getFnAttribute("unsafe-fp-math").getValueAsBool(); |
4401 | } |
4402 | |
4403 | |
4404 | |
4405 | |
4406 | |
4407 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, |
4408 | TargetLowering::DAGCombinerInfo &DCI, |
4409 | const NVPTXSubtarget &Subtarget, |
4410 | CodeGenOpt::Level OptLevel) { |
4411 | SelectionDAG &DAG = DCI.DAG; |
4412 | |
4413 | EVT VT=N0.getValueType(); |
4414 | if (VT.isVector()) |
4415 | return SDValue(); |
4416 | |
4417 | |
4418 | |
4419 | if (N0.getOpcode() == ISD::MUL) { |
4420 | assert (VT.isInteger()); |
4421 | |
4422 | |
4423 | |
4424 | |
4425 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || |
4426 | !N0.getNode()->hasOneUse()) |
4427 | return SDValue(); |
4428 | |
4429 | |
4430 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, |
4431 | N0.getOperand(0), N0.getOperand(1), N1); |
4432 | } |
4433 | else if (N0.getOpcode() == ISD::FMUL) { |
4434 | if (VT == MVT::f32 || VT == MVT::f64) { |
4435 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( |
4436 | &DAG.getTargetLoweringInfo()); |
4437 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) |
4438 | return SDValue(); |
4439 | |
4440 | |
4441 | |
4442 | |
4443 | |
4444 | |
4445 | |
4446 | |
4447 | |
4448 | int numUses = 0; |
4449 | int nonAddCount = 0; |
4450 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), |
4451 | UE = N0.getNode()->use_end(); |
4452 | UI != UE; ++UI) { |
4453 | numUses++; |
4454 | SDNode *User = *UI; |
4455 | if (User->getOpcode() != ISD::FADD) |
4456 | ++nonAddCount; |
4457 | } |
4458 | if (numUses >= 5) |
4459 | return SDValue(); |
4460 | if (nonAddCount) { |
4461 | int orderNo = N->getIROrder(); |
4462 | int orderNo2 = N0.getNode()->getIROrder(); |
4463 | |
4464 | |
4465 | |
4466 | |
4467 | if (orderNo - orderNo2 < 500) |
4468 | return SDValue(); |
4469 | |
4470 | |
4471 | |
4472 | bool opIsLive = false; |
4473 | const SDNode *left = N0.getOperand(0).getNode(); |
4474 | const SDNode *right = N0.getOperand(1).getNode(); |
4475 | |
4476 | if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) |
4477 | opIsLive = true; |
4478 | |
4479 | if (!opIsLive) |
4480 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { |
4481 | SDNode *User = *UI; |
4482 | int orderNo3 = User->getIROrder(); |
4483 | if (orderNo3 > orderNo) { |
4484 | opIsLive = true; |
4485 | break; |
4486 | } |
4487 | } |
4488 | |
4489 | if (!opIsLive) |
4490 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { |
4491 | SDNode *User = *UI; |
4492 | int orderNo3 = User->getIROrder(); |
4493 | if (orderNo3 > orderNo) { |
4494 | opIsLive = true; |
4495 | break; |
4496 | } |
4497 | } |
4498 | |
4499 | if (!opIsLive) |
4500 | return SDValue(); |
4501 | } |
4502 | |
4503 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, |
4504 | N0.getOperand(0), N0.getOperand(1), N1); |
4505 | } |
4506 | } |
4507 | |
4508 | return SDValue(); |
4509 | } |
4510 | |
4511 | |
4512 | |
4513 | static SDValue PerformADDCombine(SDNode *N, |
4514 | TargetLowering::DAGCombinerInfo &DCI, |
4515 | const NVPTXSubtarget &Subtarget, |
4516 | CodeGenOpt::Level OptLevel) { |
4517 | SDValue N0 = N->getOperand(0); |
4518 | SDValue N1 = N->getOperand(1); |
4519 | |
4520 | |
4521 | if (SDValue Result = |
4522 | PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) |
4523 | return Result; |
4524 | |
4525 | |
4526 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); |
4527 | } |
4528 | |
4529 | static SDValue PerformANDCombine(SDNode *N, |
4530 | TargetLowering::DAGCombinerInfo &DCI) { |
4531 | |
4532 | |
4533 | |
4534 | |
4535 | |
4536 | SDValue Val = N->getOperand(0); |
4537 | SDValue Mask = N->getOperand(1); |
4538 | |
4539 | if (isa<ConstantSDNode>(Val)) { |
4540 | std::swap(Val, Mask); |
4541 | } |
4542 | |
4543 | SDValue AExt; |
4544 | |
4545 | if (Val.getOpcode() == ISD::ANY_EXTEND) { |
4546 | AExt = Val; |
4547 | Val = Val->getOperand(0); |
4548 | } |
4549 | |
4550 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { |
4551 | Val = Val->getOperand(0); |
4552 | } |
4553 | |
4554 | if (Val->getOpcode() == NVPTXISD::LoadV2 || |
4555 | Val->getOpcode() == NVPTXISD::LoadV4) { |
4556 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); |
4557 | if (!MaskCnst) { |
4558 | |
4559 | return SDValue(); |
4560 | } |
4561 | |
4562 | uint64_t MaskVal = MaskCnst->getZExtValue(); |
4563 | if (MaskVal != 0xff) { |
4564 | |
4565 | return SDValue(); |
4566 | } |
4567 | |
4568 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); |
4569 | if (!Mem) { |
4570 | |
4571 | return SDValue(); |
4572 | } |
4573 | |
4574 | EVT MemVT = Mem->getMemoryVT(); |
4575 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { |
4576 | |
4577 | return SDValue(); |
4578 | } |
4579 | |
4580 | unsigned ExtType = |
4581 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> |
4582 | getZExtValue(); |
4583 | if (ExtType == ISD::SEXTLOAD) { |
4584 | |
4585 | |
4586 | return SDValue(); |
4587 | } |
4588 | |
4589 | bool AddTo = false; |
4590 | if (AExt.getNode() != nullptr) { |
4591 | |
4592 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), |
4593 | AExt.getValueType(), Val); |
4594 | AddTo = true; |
4595 | } |
4596 | |
4597 | |
4598 | DCI.CombineTo(N, Val, AddTo); |
4599 | } |
4600 | |
4601 | return SDValue(); |
4602 | } |
4603 | |
4604 | static SDValue PerformREMCombine(SDNode *N, |
4605 | TargetLowering::DAGCombinerInfo &DCI, |
4606 | CodeGenOpt::Level OptLevel) { |
4607 | assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM); |
4608 | |
4609 | |
4610 | if (OptLevel < CodeGenOpt::Default) |
4611 | return SDValue(); |
4612 | |
4613 | SelectionDAG &DAG = DCI.DAG; |
4614 | SDLoc DL(N); |
4615 | EVT VT = N->getValueType(0); |
4616 | bool IsSigned = N->getOpcode() == ISD::SREM; |
4617 | unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; |
4618 | |
4619 | const SDValue &Num = N->getOperand(0); |
4620 | const SDValue &Den = N->getOperand(1); |
4621 | |
4622 | for (const SDNode *U : Num->uses()) { |
4623 | if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && |
4624 | U->getOperand(1) == Den) { |
4625 | |
4626 | return DAG.getNode(ISD::SUB, DL, VT, Num, |
4627 | DAG.getNode(ISD::MUL, DL, VT, |
4628 | DAG.getNode(DivOpc, DL, VT, Num, Den), |
4629 | Den)); |
4630 | } |
4631 | } |
4632 | return SDValue(); |
4633 | } |
4634 | |
4635 | enum OperandSignedness { |
4636 | Signed = 0, |
4637 | Unsigned, |
4638 | Unknown |
4639 | }; |
4640 | |
4641 | |
4642 | |
4643 | |
4644 | static bool IsMulWideOperandDemotable(SDValue Op, |
4645 | unsigned OptSize, |
4646 | OperandSignedness &S) { |
4647 | S = Unknown; |
4648 | |
4649 | if (Op.getOpcode() == ISD::SIGN_EXTEND || |
4650 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
4651 | EVT OrigVT = Op.getOperand(0).getValueType(); |
4652 | if (OrigVT.getFixedSizeInBits() <= OptSize) { |
4653 | S = Signed; |
4654 | return true; |
4655 | } |
4656 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { |
4657 | EVT OrigVT = Op.getOperand(0).getValueType(); |
4658 | if (OrigVT.getFixedSizeInBits() <= OptSize) { |
4659 | S = Unsigned; |
4660 | return true; |
4661 | } |
4662 | } |
4663 | |
4664 | return false; |
4665 | } |
4666 | |
4667 | |
4668 | |
4669 | |
4670 | |
4671 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, |
4672 | unsigned OptSize, |
4673 | bool &IsSigned) { |
4674 | OperandSignedness LHSSign; |
4675 | |
4676 | |
4677 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) |
4678 | return false; |
4679 | |
4680 | |
4681 | if (LHSSign == Unknown) |
4682 | return false; |
4683 | |
4684 | IsSigned = (LHSSign == Signed); |
4685 | |
4686 | |
4687 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { |
4688 | const APInt &Val = CI->getAPIntValue(); |
4689 | if (LHSSign == Unsigned) { |
4690 | return Val.isIntN(OptSize); |
4691 | } else { |
4692 | return Val.isSignedIntN(OptSize); |
4693 | } |
4694 | } else { |
4695 | OperandSignedness RHSSign; |
4696 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) |
4697 | return false; |
4698 | |
4699 | return LHSSign == RHSSign; |
4700 | } |
4701 | } |
4702 | |
4703 | |
4704 | |
4705 | |
4706 | |
4707 | static SDValue TryMULWIDECombine(SDNode *N, |
4708 | TargetLowering::DAGCombinerInfo &DCI) { |
4709 | EVT MulType = N->getValueType(0); |
4710 | if (MulType != MVT::i32 && MulType != MVT::i64) { |
4711 | return SDValue(); |
4712 | } |
4713 | |
4714 | SDLoc DL(N); |
4715 | unsigned OptSize = MulType.getSizeInBits() >> 1; |
4716 | SDValue LHS = N->getOperand(0); |
4717 | SDValue RHS = N->getOperand(1); |
4718 | |
4719 | |
4720 | if (N->getOpcode() == ISD::MUL) { |
4721 | if (isa<ConstantSDNode>(LHS)) { |
4722 | std::swap(LHS, RHS); |
4723 | } |
4724 | } |
4725 | |
4726 | |
4727 | if (N->getOpcode() == ISD::SHL) { |
4728 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); |
4729 | if (!ShlRHS) { |
4730 | return SDValue(); |
4731 | } |
4732 | |
4733 | APInt ShiftAmt = ShlRHS->getAPIntValue(); |
4734 | unsigned BitWidth = MulType.getSizeInBits(); |
4735 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { |
4736 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; |
4737 | RHS = DCI.DAG.getConstant(MulVal, DL, MulType); |
4738 | } else { |
4739 | return SDValue(); |
4740 | } |
4741 | } |
4742 | |
4743 | bool Signed; |
4744 | |
4745 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { |
4746 | return SDValue(); |
4747 | } |
4748 | |
4749 | EVT DemotedVT; |
4750 | if (MulType == MVT::i32) { |
4751 | DemotedVT = MVT::i16; |
4752 | } else { |
4753 | DemotedVT = MVT::i32; |
4754 | } |
4755 | |
4756 | |
4757 | |
4758 | SDValue TruncLHS = |
4759 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); |
4760 | SDValue TruncRHS = |
4761 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); |
4762 | |
4763 | unsigned Opc; |
4764 | if (Signed) { |
4765 | Opc = NVPTXISD::MUL_WIDE_SIGNED; |
4766 | } else { |
4767 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; |
4768 | } |
4769 | |
4770 | return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); |
4771 | } |
4772 | |
4773 | |
4774 | static SDValue PerformMULCombine(SDNode *N, |
4775 | TargetLowering::DAGCombinerInfo &DCI, |
4776 | CodeGenOpt::Level OptLevel) { |
4777 | if (OptLevel > 0) { |
4778 | |
4779 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) |
4780 | return Ret; |
4781 | } |
4782 | |
4783 | return SDValue(); |
4784 | } |
4785 | |
4786 | |
4787 | static SDValue PerformSHLCombine(SDNode *N, |
4788 | TargetLowering::DAGCombinerInfo &DCI, |
4789 | CodeGenOpt::Level OptLevel) { |
4790 | if (OptLevel > 0) { |
4791 | |
4792 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) |
4793 | return Ret; |
4794 | } |
4795 | |
4796 | return SDValue(); |
4797 | } |
4798 | |
4799 | static SDValue PerformSETCCCombine(SDNode *N, |
4800 | TargetLowering::DAGCombinerInfo &DCI) { |
4801 | EVT CCType = N->getValueType(0); |
4802 | SDValue A = N->getOperand(0); |
4803 | SDValue B = N->getOperand(1); |
4804 | |
4805 | if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16) |
4806 | return SDValue(); |
4807 | |
4808 | SDLoc DL(N); |
4809 | |
4810 | |
4811 | |
4812 | |
4813 | SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL, |
4814 | DCI.DAG.getVTList(MVT::i1, MVT::i1), |
4815 | {A, B, N->getOperand(2)}); |
4816 | return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0), |
4817 | CCNode.getValue(1)); |
4818 | } |
4819 | |
4820 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, |
4821 | DAGCombinerInfo &DCI) const { |
4822 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); |
4823 | switch (N->getOpcode()) { |
4824 | default: break; |
4825 | case ISD::ADD: |
4826 | case ISD::FADD: |
4827 | return PerformADDCombine(N, DCI, STI, OptLevel); |
4828 | case ISD::MUL: |
4829 | return PerformMULCombine(N, DCI, OptLevel); |
4830 | case ISD::SHL: |
4831 | return PerformSHLCombine(N, DCI, OptLevel); |
4832 | case ISD::AND: |
4833 | return PerformANDCombine(N, DCI); |
4834 | case ISD::UREM: |
4835 | case ISD::SREM: |
4836 | return PerformREMCombine(N, DCI, OptLevel); |
4837 | case ISD::SETCC: |
4838 | return PerformSETCCCombine(N, DCI); |
4839 | } |
4840 | return SDValue(); |
4841 | } |
4842 | |
4843 | |
4844 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, |
4845 | SmallVectorImpl<SDValue> &Results) { |
4846 | EVT ResVT = N->getValueType(0); |
4847 | SDLoc DL(N); |
4848 | |
4849 | assert(ResVT.isVector() && "Vector load must have vector type"); |
4850 | |
4851 | |
4852 | |
4853 | |
4854 | assert(ResVT.isSimple() && "Can only handle simple types"); |
4855 | switch (ResVT.getSimpleVT().SimpleTy) { |
4856 | default: |
4857 | return; |
4858 | case MVT::v2i8: |
4859 | case MVT::v2i16: |
4860 | case MVT::v2i32: |
4861 | case MVT::v2i64: |
4862 | case MVT::v2f16: |
4863 | case MVT::v2f32: |
4864 | case MVT::v2f64: |
4865 | case MVT::v4i8: |
4866 | case MVT::v4i16: |
4867 | case MVT::v4i32: |
4868 | case MVT::v4f16: |
4869 | case MVT::v4f32: |
4870 | case MVT::v8f16: |
4871 | |
4872 | break; |
4873 | } |
4874 | |
4875 | LoadSDNode *LD = cast<LoadSDNode>(N); |
4876 | |
4877 | Align Alignment = LD->getAlign(); |
4878 | auto &TD = DAG.getDataLayout(); |
4879 | Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext())); |
4880 | if (Alignment < PrefAlign) { |
4881 | |
4882 | |
4883 | |
4884 | |
4885 | |
4886 | return; |
4887 | } |
4888 | |
4889 | EVT EltVT = ResVT.getVectorElementType(); |
4890 | unsigned NumElts = ResVT.getVectorNumElements(); |
4891 | |
4892 | |
4893 | |
4894 | |
4895 | bool NeedTrunc = false; |
4896 | if (EltVT.getSizeInBits() < 16) { |
4897 | EltVT = MVT::i16; |
4898 | NeedTrunc = true; |
4899 | } |
4900 | |
4901 | unsigned Opcode = 0; |
4902 | SDVTList LdResVTs; |
4903 | bool LoadF16x2 = false; |
4904 | |
4905 | switch (NumElts) { |
4906 | default: |
4907 | return; |
4908 | case 2: |
4909 | Opcode = NVPTXISD::LoadV2; |
4910 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); |
4911 | break; |
4912 | case 4: { |
4913 | Opcode = NVPTXISD::LoadV4; |
4914 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; |
4915 | LdResVTs = DAG.getVTList(ListVTs); |
4916 | break; |
4917 | } |
4918 | case 8: { |
4919 | |
4920 | |
4921 | |
4922 | assert(EltVT == MVT::f16 && "Unsupported v8 vector type."); |
4923 | LoadF16x2 = true; |
4924 | Opcode = NVPTXISD::LoadV4; |
4925 | EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16, |
4926 | MVT::Other}; |
4927 | LdResVTs = DAG.getVTList(ListVTs); |
4928 | break; |
4929 | } |
4930 | } |
4931 | |
4932 | |
4933 | SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); |
4934 | |
4935 | |
4936 | |
4937 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); |
4938 | |
4939 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, |
4940 | LD->getMemoryVT(), |
4941 | LD->getMemOperand()); |
4942 | |
4943 | SmallVector<SDValue, 8> ScalarRes; |
4944 | if (LoadF16x2) { |
4945 | |
4946 | NumElts /= 2; |
4947 | for (unsigned i = 0; i < NumElts; ++i) { |
4948 | SDValue SubVector = NewLD.getValue(i); |
4949 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, |
4950 | DAG.getIntPtrConstant(0, DL)); |
4951 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, |
4952 | DAG.getIntPtrConstant(1, DL)); |
4953 | ScalarRes.push_back(E0); |
4954 | ScalarRes.push_back(E1); |
4955 | } |
4956 | } else { |
4957 | for (unsigned i = 0; i < NumElts; ++i) { |
4958 | SDValue Res = NewLD.getValue(i); |
4959 | if (NeedTrunc) |
4960 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); |
4961 | ScalarRes.push_back(Res); |
4962 | } |
4963 | } |
4964 | |
4965 | SDValue LoadChain = NewLD.getValue(NumElts); |
4966 | |
4967 | SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); |
4968 | |
4969 | Results.push_back(BuildVec); |
4970 | Results.push_back(LoadChain); |
4971 | } |
4972 | |
4973 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, |
4974 | SmallVectorImpl<SDValue> &Results) { |
4975 | SDValue Chain = N->getOperand(0); |
4976 | SDValue Intrin = N->getOperand(1); |
4977 | SDLoc DL(N); |
4978 | |
4979 | |
4980 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); |
4981 | switch (IntrinNo) { |
4982 | default: |
4983 | return; |
4984 | case Intrinsic::nvvm_ldg_global_i: |
4985 | case Intrinsic::nvvm_ldg_global_f: |
4986 | case Intrinsic::nvvm_ldg_global_p: |
4987 | case Intrinsic::nvvm_ldu_global_i: |
4988 | case Intrinsic::nvvm_ldu_global_f: |
4989 | case Intrinsic::nvvm_ldu_global_p: { |
4990 | EVT ResVT = N->getValueType(0); |
4991 | |
4992 | if (ResVT.isVector()) { |
4993 | |
4994 | |
4995 | unsigned NumElts = ResVT.getVectorNumElements(); |
4996 | EVT EltVT = ResVT.getVectorElementType(); |
4997 | |
4998 | |
4999 | |
5000 | |
5001 | |
5002 | bool NeedTrunc = false; |
5003 | if (EltVT.getSizeInBits() < 16) { |
5004 | EltVT = MVT::i16; |
5005 | NeedTrunc = true; |
5006 | } |
5007 | |
5008 | unsigned Opcode = 0; |
5009 | SDVTList LdResVTs; |
5010 | |
5011 | switch (NumElts) { |
5012 | default: |
5013 | return; |
5014 | case 2: |
5015 | switch (IntrinNo) { |
5016 | default: |
5017 | return; |
5018 | case Intrinsic::nvvm_ldg_global_i: |
5019 | case Intrinsic::nvvm_ldg_global_f: |
5020 | case Intrinsic::nvvm_ldg_global_p: |
5021 | Opcode = NVPTXISD::LDGV2; |
5022 | break; |
5023 | case Intrinsic::nvvm_ldu_global_i: |
5024 | case Intrinsic::nvvm_ldu_global_f: |
5025 | case Intrinsic::nvvm_ldu_global_p: |
5026 | Opcode = NVPTXISD::LDUV2; |
5027 | break; |
5028 | } |
5029 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); |
5030 | break; |
5031 | case 4: { |
5032 | switch (IntrinNo) { |
5033 | default: |
5034 | return; |
5035 | case Intrinsic::nvvm_ldg_global_i: |
5036 | case Intrinsic::nvvm_ldg_global_f: |
5037 | case Intrinsic::nvvm_ldg_global_p: |
5038 | Opcode = NVPTXISD::LDGV4; |
5039 | break; |
5040 | case Intrinsic::nvvm_ldu_global_i: |
5041 | case Intrinsic::nvvm_ldu_global_f: |
5042 | case Intrinsic::nvvm_ldu_global_p: |
5043 | Opcode = NVPTXISD::LDUV4; |
5044 | break; |
5045 | } |
5046 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; |
5047 | LdResVTs = DAG.getVTList(ListVTs); |
5048 | break; |
5049 | } |
5050 | } |
5051 | |
5052 | SmallVector<SDValue, 8> OtherOps; |
5053 | |
5054 | |
5055 | |
5056 | OtherOps.push_back(Chain); |
5057 | |
5058 | |
5059 | OtherOps.append(N->op_begin() + 2, N->op_end()); |
5060 | |
5061 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); |
5062 | |
5063 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, |
5064 | MemSD->getMemoryVT(), |
5065 | MemSD->getMemOperand()); |
5066 | |
5067 | SmallVector<SDValue, 4> ScalarRes; |
5068 | |
5069 | for (unsigned i = 0; i < NumElts; ++i) { |
5070 | SDValue Res = NewLD.getValue(i); |
5071 | if (NeedTrunc) |
5072 | Res = |
5073 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); |
5074 | ScalarRes.push_back(Res); |
5075 | } |
5076 | |
5077 | SDValue LoadChain = NewLD.getValue(NumElts); |
5078 | |
5079 | SDValue BuildVec = |
5080 | DAG.getBuildVector(ResVT, DL, ScalarRes); |
5081 | |
5082 | Results.push_back(BuildVec); |
5083 | Results.push_back(LoadChain); |
5084 | } else { |
5085 | |
5086 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && |
5087 | "Custom handling of non-i8 ldu/ldg?"); |
5088 | |
5089 | |
5090 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); |
5091 | |
5092 | |
5093 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); |
5094 | |
5095 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); |
5096 | |
5097 | |
5098 | |
5099 | SDValue NewLD = |
5100 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, |
5101 | MVT::i8, MemSD->getMemOperand()); |
5102 | |
5103 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
5104 | NewLD.getValue(0))); |
5105 | Results.push_back(NewLD.getValue(1)); |
5106 | } |
5107 | } |
5108 | } |
5109 | } |
5110 | |
5111 | void NVPTXTargetLowering::ReplaceNodeResults( |
5112 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
5113 | switch (N->getOpcode()) { |
5114 | default: |
5115 | report_fatal_error("Unhandled custom legalization"); |
5116 | case ISD::LOAD: |
5117 | ReplaceLoadVector(N, DAG, Results); |
5118 | return; |
5119 | case ISD::INTRINSIC_W_CHAIN: |
5120 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); |
5121 | return; |
5122 | } |
5123 | } |
5124 | |
5125 | |
5126 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {} |
5127 | |
5128 | MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( |
5129 | const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { |
5130 | return getDataSection(); |
5131 | } |