clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name NVPTXISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/NVPTX -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/NVPTX -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | #include "NVPTXISelDAGToDAG.h" |
14 | #include "MCTargetDesc/NVPTXBaseInfo.h" |
15 | #include "NVPTXUtilities.h" |
16 | #include "llvm/Analysis/ValueTracking.h" |
17 | #include "llvm/IR/GlobalValue.h" |
18 | #include "llvm/IR/Instructions.h" |
19 | #include "llvm/IR/IntrinsicsNVPTX.h" |
20 | #include "llvm/Support/AtomicOrdering.h" |
21 | #include "llvm/Support/CommandLine.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include "llvm/Support/ErrorHandling.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | #include "llvm/Target/TargetIntrinsicInfo.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "nvptx-isel" |
30 | |
31 | |
32 | |
33 | FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, |
34 | llvm::CodeGenOpt::Level OptLevel) { |
35 | return new NVPTXDAGToDAGISel(TM, OptLevel); |
36 | } |
37 | |
38 | NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, |
39 | CodeGenOpt::Level OptLevel) |
40 | : SelectionDAGISel(tm, OptLevel), TM(tm) { |
41 | doMulWide = (OptLevel > 0); |
42 | } |
43 | |
44 | bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { |
45 | Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); |
46 | return SelectionDAGISel::runOnMachineFunction(MF); |
47 | } |
48 | |
49 | int NVPTXDAGToDAGISel::getDivF32Level() const { |
50 | return Subtarget->getTargetLowering()->getDivF32Level(); |
51 | } |
52 | |
53 | bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { |
54 | return Subtarget->getTargetLowering()->usePrecSqrtF32(); |
55 | } |
56 | |
57 | bool NVPTXDAGToDAGISel::useF32FTZ() const { |
58 | return Subtarget->getTargetLowering()->useF32FTZ(*MF); |
59 | } |
60 | |
61 | bool NVPTXDAGToDAGISel::allowFMA() const { |
62 | const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); |
63 | return TL->allowFMA(*MF, OptLevel); |
64 | } |
65 | |
66 | bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { |
67 | const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); |
68 | return TL->allowUnsafeFPMath(*MF); |
69 | } |
70 | |
71 | bool NVPTXDAGToDAGISel::useShortPointers() const { |
72 | return TM.useShortPointers(); |
73 | } |
74 | |
75 | |
76 | |
77 | void NVPTXDAGToDAGISel::Select(SDNode *N) { |
78 | |
79 | if (N->isMachineOpcode()) { |
80 | N->setNodeId(-1); |
81 | return; |
82 | } |
83 | |
84 | switch (N->getOpcode()) { |
85 | case ISD::LOAD: |
86 | case ISD::ATOMIC_LOAD: |
87 | if (tryLoad(N)) |
88 | return; |
89 | break; |
90 | case ISD::STORE: |
91 | case ISD::ATOMIC_STORE: |
92 | if (tryStore(N)) |
93 | return; |
94 | break; |
95 | case ISD::EXTRACT_VECTOR_ELT: |
96 | if (tryEXTRACT_VECTOR_ELEMENT(N)) |
97 | return; |
98 | break; |
99 | case NVPTXISD::SETP_F16X2: |
100 | SelectSETP_F16X2(N); |
101 | return; |
102 | |
103 | case NVPTXISD::LoadV2: |
104 | case NVPTXISD::LoadV4: |
105 | if (tryLoadVector(N)) |
106 | return; |
107 | break; |
108 | case NVPTXISD::LDGV2: |
109 | case NVPTXISD::LDGV4: |
110 | case NVPTXISD::LDUV2: |
111 | case NVPTXISD::LDUV4: |
112 | if (tryLDGLDU(N)) |
113 | return; |
114 | break; |
115 | case NVPTXISD::StoreV2: |
116 | case NVPTXISD::StoreV4: |
117 | if (tryStoreVector(N)) |
118 | return; |
119 | break; |
120 | case NVPTXISD::LoadParam: |
121 | case NVPTXISD::LoadParamV2: |
122 | case NVPTXISD::LoadParamV4: |
123 | if (tryLoadParam(N)) |
124 | return; |
125 | break; |
126 | case NVPTXISD::StoreRetval: |
127 | case NVPTXISD::StoreRetvalV2: |
128 | case NVPTXISD::StoreRetvalV4: |
129 | if (tryStoreRetval(N)) |
130 | return; |
131 | break; |
132 | case NVPTXISD::StoreParam: |
133 | case NVPTXISD::StoreParamV2: |
134 | case NVPTXISD::StoreParamV4: |
135 | case NVPTXISD::StoreParamS32: |
136 | case NVPTXISD::StoreParamU32: |
137 | if (tryStoreParam(N)) |
138 | return; |
139 | break; |
140 | case ISD::INTRINSIC_WO_CHAIN: |
141 | if (tryIntrinsicNoChain(N)) |
142 | return; |
143 | break; |
144 | case ISD::INTRINSIC_W_CHAIN: |
145 | if (tryIntrinsicChain(N)) |
146 | return; |
147 | break; |
148 | case NVPTXISD::Tex1DFloatS32: |
149 | case NVPTXISD::Tex1DFloatFloat: |
150 | case NVPTXISD::Tex1DFloatFloatLevel: |
151 | case NVPTXISD::Tex1DFloatFloatGrad: |
152 | case NVPTXISD::Tex1DS32S32: |
153 | case NVPTXISD::Tex1DS32Float: |
154 | case NVPTXISD::Tex1DS32FloatLevel: |
155 | case NVPTXISD::Tex1DS32FloatGrad: |
156 | case NVPTXISD::Tex1DU32S32: |
157 | case NVPTXISD::Tex1DU32Float: |
158 | case NVPTXISD::Tex1DU32FloatLevel: |
159 | case NVPTXISD::Tex1DU32FloatGrad: |
160 | case NVPTXISD::Tex1DArrayFloatS32: |
161 | case NVPTXISD::Tex1DArrayFloatFloat: |
162 | case NVPTXISD::Tex1DArrayFloatFloatLevel: |
163 | case NVPTXISD::Tex1DArrayFloatFloatGrad: |
164 | case NVPTXISD::Tex1DArrayS32S32: |
165 | case NVPTXISD::Tex1DArrayS32Float: |
166 | case NVPTXISD::Tex1DArrayS32FloatLevel: |
167 | case NVPTXISD::Tex1DArrayS32FloatGrad: |
168 | case NVPTXISD::Tex1DArrayU32S32: |
169 | case NVPTXISD::Tex1DArrayU32Float: |
170 | case NVPTXISD::Tex1DArrayU32FloatLevel: |
171 | case NVPTXISD::Tex1DArrayU32FloatGrad: |
172 | case NVPTXISD::Tex2DFloatS32: |
173 | case NVPTXISD::Tex2DFloatFloat: |
174 | case NVPTXISD::Tex2DFloatFloatLevel: |
175 | case NVPTXISD::Tex2DFloatFloatGrad: |
176 | case NVPTXISD::Tex2DS32S32: |
177 | case NVPTXISD::Tex2DS32Float: |
178 | case NVPTXISD::Tex2DS32FloatLevel: |
179 | case NVPTXISD::Tex2DS32FloatGrad: |
180 | case NVPTXISD::Tex2DU32S32: |
181 | case NVPTXISD::Tex2DU32Float: |
182 | case NVPTXISD::Tex2DU32FloatLevel: |
183 | case NVPTXISD::Tex2DU32FloatGrad: |
184 | case NVPTXISD::Tex2DArrayFloatS32: |
185 | case NVPTXISD::Tex2DArrayFloatFloat: |
186 | case NVPTXISD::Tex2DArrayFloatFloatLevel: |
187 | case NVPTXISD::Tex2DArrayFloatFloatGrad: |
188 | case NVPTXISD::Tex2DArrayS32S32: |
189 | case NVPTXISD::Tex2DArrayS32Float: |
190 | case NVPTXISD::Tex2DArrayS32FloatLevel: |
191 | case NVPTXISD::Tex2DArrayS32FloatGrad: |
192 | case NVPTXISD::Tex2DArrayU32S32: |
193 | case NVPTXISD::Tex2DArrayU32Float: |
194 | case NVPTXISD::Tex2DArrayU32FloatLevel: |
195 | case NVPTXISD::Tex2DArrayU32FloatGrad: |
196 | case NVPTXISD::Tex3DFloatS32: |
197 | case NVPTXISD::Tex3DFloatFloat: |
198 | case NVPTXISD::Tex3DFloatFloatLevel: |
199 | case NVPTXISD::Tex3DFloatFloatGrad: |
200 | case NVPTXISD::Tex3DS32S32: |
201 | case NVPTXISD::Tex3DS32Float: |
202 | case NVPTXISD::Tex3DS32FloatLevel: |
203 | case NVPTXISD::Tex3DS32FloatGrad: |
204 | case NVPTXISD::Tex3DU32S32: |
205 | case NVPTXISD::Tex3DU32Float: |
206 | case NVPTXISD::Tex3DU32FloatLevel: |
207 | case NVPTXISD::Tex3DU32FloatGrad: |
208 | case NVPTXISD::TexCubeFloatFloat: |
209 | case NVPTXISD::TexCubeFloatFloatLevel: |
210 | case NVPTXISD::TexCubeS32Float: |
211 | case NVPTXISD::TexCubeS32FloatLevel: |
212 | case NVPTXISD::TexCubeU32Float: |
213 | case NVPTXISD::TexCubeU32FloatLevel: |
214 | case NVPTXISD::TexCubeArrayFloatFloat: |
215 | case NVPTXISD::TexCubeArrayFloatFloatLevel: |
216 | case NVPTXISD::TexCubeArrayS32Float: |
217 | case NVPTXISD::TexCubeArrayS32FloatLevel: |
218 | case NVPTXISD::TexCubeArrayU32Float: |
219 | case NVPTXISD::TexCubeArrayU32FloatLevel: |
220 | case NVPTXISD::Tld4R2DFloatFloat: |
221 | case NVPTXISD::Tld4G2DFloatFloat: |
222 | case NVPTXISD::Tld4B2DFloatFloat: |
223 | case NVPTXISD::Tld4A2DFloatFloat: |
224 | case NVPTXISD::Tld4R2DS64Float: |
225 | case NVPTXISD::Tld4G2DS64Float: |
226 | case NVPTXISD::Tld4B2DS64Float: |
227 | case NVPTXISD::Tld4A2DS64Float: |
228 | case NVPTXISD::Tld4R2DU64Float: |
229 | case NVPTXISD::Tld4G2DU64Float: |
230 | case NVPTXISD::Tld4B2DU64Float: |
231 | case NVPTXISD::Tld4A2DU64Float: |
232 | case NVPTXISD::TexUnified1DFloatS32: |
233 | case NVPTXISD::TexUnified1DFloatFloat: |
234 | case NVPTXISD::TexUnified1DFloatFloatLevel: |
235 | case NVPTXISD::TexUnified1DFloatFloatGrad: |
236 | case NVPTXISD::TexUnified1DS32S32: |
237 | case NVPTXISD::TexUnified1DS32Float: |
238 | case NVPTXISD::TexUnified1DS32FloatLevel: |
239 | case NVPTXISD::TexUnified1DS32FloatGrad: |
240 | case NVPTXISD::TexUnified1DU32S32: |
241 | case NVPTXISD::TexUnified1DU32Float: |
242 | case NVPTXISD::TexUnified1DU32FloatLevel: |
243 | case NVPTXISD::TexUnified1DU32FloatGrad: |
244 | case NVPTXISD::TexUnified1DArrayFloatS32: |
245 | case NVPTXISD::TexUnified1DArrayFloatFloat: |
246 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: |
247 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: |
248 | case NVPTXISD::TexUnified1DArrayS32S32: |
249 | case NVPTXISD::TexUnified1DArrayS32Float: |
250 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: |
251 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: |
252 | case NVPTXISD::TexUnified1DArrayU32S32: |
253 | case NVPTXISD::TexUnified1DArrayU32Float: |
254 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: |
255 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: |
256 | case NVPTXISD::TexUnified2DFloatS32: |
257 | case NVPTXISD::TexUnified2DFloatFloat: |
258 | case NVPTXISD::TexUnified2DFloatFloatLevel: |
259 | case NVPTXISD::TexUnified2DFloatFloatGrad: |
260 | case NVPTXISD::TexUnified2DS32S32: |
261 | case NVPTXISD::TexUnified2DS32Float: |
262 | case NVPTXISD::TexUnified2DS32FloatLevel: |
263 | case NVPTXISD::TexUnified2DS32FloatGrad: |
264 | case NVPTXISD::TexUnified2DU32S32: |
265 | case NVPTXISD::TexUnified2DU32Float: |
266 | case NVPTXISD::TexUnified2DU32FloatLevel: |
267 | case NVPTXISD::TexUnified2DU32FloatGrad: |
268 | case NVPTXISD::TexUnified2DArrayFloatS32: |
269 | case NVPTXISD::TexUnified2DArrayFloatFloat: |
270 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: |
271 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: |
272 | case NVPTXISD::TexUnified2DArrayS32S32: |
273 | case NVPTXISD::TexUnified2DArrayS32Float: |
274 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: |
275 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: |
276 | case NVPTXISD::TexUnified2DArrayU32S32: |
277 | case NVPTXISD::TexUnified2DArrayU32Float: |
278 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: |
279 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: |
280 | case NVPTXISD::TexUnified3DFloatS32: |
281 | case NVPTXISD::TexUnified3DFloatFloat: |
282 | case NVPTXISD::TexUnified3DFloatFloatLevel: |
283 | case NVPTXISD::TexUnified3DFloatFloatGrad: |
284 | case NVPTXISD::TexUnified3DS32S32: |
285 | case NVPTXISD::TexUnified3DS32Float: |
286 | case NVPTXISD::TexUnified3DS32FloatLevel: |
287 | case NVPTXISD::TexUnified3DS32FloatGrad: |
288 | case NVPTXISD::TexUnified3DU32S32: |
289 | case NVPTXISD::TexUnified3DU32Float: |
290 | case NVPTXISD::TexUnified3DU32FloatLevel: |
291 | case NVPTXISD::TexUnified3DU32FloatGrad: |
292 | case NVPTXISD::TexUnifiedCubeFloatFloat: |
293 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: |
294 | case NVPTXISD::TexUnifiedCubeS32Float: |
295 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: |
296 | case NVPTXISD::TexUnifiedCubeU32Float: |
297 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: |
298 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: |
299 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: |
300 | case NVPTXISD::TexUnifiedCubeArrayS32Float: |
301 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: |
302 | case NVPTXISD::TexUnifiedCubeArrayU32Float: |
303 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: |
304 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: |
305 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: |
306 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: |
307 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: |
308 | case NVPTXISD::Tld4UnifiedR2DS64Float: |
309 | case NVPTXISD::Tld4UnifiedG2DS64Float: |
310 | case NVPTXISD::Tld4UnifiedB2DS64Float: |
311 | case NVPTXISD::Tld4UnifiedA2DS64Float: |
312 | case NVPTXISD::Tld4UnifiedR2DU64Float: |
313 | case NVPTXISD::Tld4UnifiedG2DU64Float: |
314 | case NVPTXISD::Tld4UnifiedB2DU64Float: |
315 | case NVPTXISD::Tld4UnifiedA2DU64Float: |
316 | if (tryTextureIntrinsic(N)) |
317 | return; |
318 | break; |
319 | case NVPTXISD::Suld1DI8Clamp: |
320 | case NVPTXISD::Suld1DI16Clamp: |
321 | case NVPTXISD::Suld1DI32Clamp: |
322 | case NVPTXISD::Suld1DI64Clamp: |
323 | case NVPTXISD::Suld1DV2I8Clamp: |
324 | case NVPTXISD::Suld1DV2I16Clamp: |
325 | case NVPTXISD::Suld1DV2I32Clamp: |
326 | case NVPTXISD::Suld1DV2I64Clamp: |
327 | case NVPTXISD::Suld1DV4I8Clamp: |
328 | case NVPTXISD::Suld1DV4I16Clamp: |
329 | case NVPTXISD::Suld1DV4I32Clamp: |
330 | case NVPTXISD::Suld1DArrayI8Clamp: |
331 | case NVPTXISD::Suld1DArrayI16Clamp: |
332 | case NVPTXISD::Suld1DArrayI32Clamp: |
333 | case NVPTXISD::Suld1DArrayI64Clamp: |
334 | case NVPTXISD::Suld1DArrayV2I8Clamp: |
335 | case NVPTXISD::Suld1DArrayV2I16Clamp: |
336 | case NVPTXISD::Suld1DArrayV2I32Clamp: |
337 | case NVPTXISD::Suld1DArrayV2I64Clamp: |
338 | case NVPTXISD::Suld1DArrayV4I8Clamp: |
339 | case NVPTXISD::Suld1DArrayV4I16Clamp: |
340 | case NVPTXISD::Suld1DArrayV4I32Clamp: |
341 | case NVPTXISD::Suld2DI8Clamp: |
342 | case NVPTXISD::Suld2DI16Clamp: |
343 | case NVPTXISD::Suld2DI32Clamp: |
344 | case NVPTXISD::Suld2DI64Clamp: |
345 | case NVPTXISD::Suld2DV2I8Clamp: |
346 | case NVPTXISD::Suld2DV2I16Clamp: |
347 | case NVPTXISD::Suld2DV2I32Clamp: |
348 | case NVPTXISD::Suld2DV2I64Clamp: |
349 | case NVPTXISD::Suld2DV4I8Clamp: |
350 | case NVPTXISD::Suld2DV4I16Clamp: |
351 | case NVPTXISD::Suld2DV4I32Clamp: |
352 | case NVPTXISD::Suld2DArrayI8Clamp: |
353 | case NVPTXISD::Suld2DArrayI16Clamp: |
354 | case NVPTXISD::Suld2DArrayI32Clamp: |
355 | case NVPTXISD::Suld2DArrayI64Clamp: |
356 | case NVPTXISD::Suld2DArrayV2I8Clamp: |
357 | case NVPTXISD::Suld2DArrayV2I16Clamp: |
358 | case NVPTXISD::Suld2DArrayV2I32Clamp: |
359 | case NVPTXISD::Suld2DArrayV2I64Clamp: |
360 | case NVPTXISD::Suld2DArrayV4I8Clamp: |
361 | case NVPTXISD::Suld2DArrayV4I16Clamp: |
362 | case NVPTXISD::Suld2DArrayV4I32Clamp: |
363 | case NVPTXISD::Suld3DI8Clamp: |
364 | case NVPTXISD::Suld3DI16Clamp: |
365 | case NVPTXISD::Suld3DI32Clamp: |
366 | case NVPTXISD::Suld3DI64Clamp: |
367 | case NVPTXISD::Suld3DV2I8Clamp: |
368 | case NVPTXISD::Suld3DV2I16Clamp: |
369 | case NVPTXISD::Suld3DV2I32Clamp: |
370 | case NVPTXISD::Suld3DV2I64Clamp: |
371 | case NVPTXISD::Suld3DV4I8Clamp: |
372 | case NVPTXISD::Suld3DV4I16Clamp: |
373 | case NVPTXISD::Suld3DV4I32Clamp: |
374 | case NVPTXISD::Suld1DI8Trap: |
375 | case NVPTXISD::Suld1DI16Trap: |
376 | case NVPTXISD::Suld1DI32Trap: |
377 | case NVPTXISD::Suld1DI64Trap: |
378 | case NVPTXISD::Suld1DV2I8Trap: |
379 | case NVPTXISD::Suld1DV2I16Trap: |
380 | case NVPTXISD::Suld1DV2I32Trap: |
381 | case NVPTXISD::Suld1DV2I64Trap: |
382 | case NVPTXISD::Suld1DV4I8Trap: |
383 | case NVPTXISD::Suld1DV4I16Trap: |
384 | case NVPTXISD::Suld1DV4I32Trap: |
385 | case NVPTXISD::Suld1DArrayI8Trap: |
386 | case NVPTXISD::Suld1DArrayI16Trap: |
387 | case NVPTXISD::Suld1DArrayI32Trap: |
388 | case NVPTXISD::Suld1DArrayI64Trap: |
389 | case NVPTXISD::Suld1DArrayV2I8Trap: |
390 | case NVPTXISD::Suld1DArrayV2I16Trap: |
391 | case NVPTXISD::Suld1DArrayV2I32Trap: |
392 | case NVPTXISD::Suld1DArrayV2I64Trap: |
393 | case NVPTXISD::Suld1DArrayV4I8Trap: |
394 | case NVPTXISD::Suld1DArrayV4I16Trap: |
395 | case NVPTXISD::Suld1DArrayV4I32Trap: |
396 | case NVPTXISD::Suld2DI8Trap: |
397 | case NVPTXISD::Suld2DI16Trap: |
398 | case NVPTXISD::Suld2DI32Trap: |
399 | case NVPTXISD::Suld2DI64Trap: |
400 | case NVPTXISD::Suld2DV2I8Trap: |
401 | case NVPTXISD::Suld2DV2I16Trap: |
402 | case NVPTXISD::Suld2DV2I32Trap: |
403 | case NVPTXISD::Suld2DV2I64Trap: |
404 | case NVPTXISD::Suld2DV4I8Trap: |
405 | case NVPTXISD::Suld2DV4I16Trap: |
406 | case NVPTXISD::Suld2DV4I32Trap: |
407 | case NVPTXISD::Suld2DArrayI8Trap: |
408 | case NVPTXISD::Suld2DArrayI16Trap: |
409 | case NVPTXISD::Suld2DArrayI32Trap: |
410 | case NVPTXISD::Suld2DArrayI64Trap: |
411 | case NVPTXISD::Suld2DArrayV2I8Trap: |
412 | case NVPTXISD::Suld2DArrayV2I16Trap: |
413 | case NVPTXISD::Suld2DArrayV2I32Trap: |
414 | case NVPTXISD::Suld2DArrayV2I64Trap: |
415 | case NVPTXISD::Suld2DArrayV4I8Trap: |
416 | case NVPTXISD::Suld2DArrayV4I16Trap: |
417 | case NVPTXISD::Suld2DArrayV4I32Trap: |
418 | case NVPTXISD::Suld3DI8Trap: |
419 | case NVPTXISD::Suld3DI16Trap: |
420 | case NVPTXISD::Suld3DI32Trap: |
421 | case NVPTXISD::Suld3DI64Trap: |
422 | case NVPTXISD::Suld3DV2I8Trap: |
423 | case NVPTXISD::Suld3DV2I16Trap: |
424 | case NVPTXISD::Suld3DV2I32Trap: |
425 | case NVPTXISD::Suld3DV2I64Trap: |
426 | case NVPTXISD::Suld3DV4I8Trap: |
427 | case NVPTXISD::Suld3DV4I16Trap: |
428 | case NVPTXISD::Suld3DV4I32Trap: |
429 | case NVPTXISD::Suld1DI8Zero: |
430 | case NVPTXISD::Suld1DI16Zero: |
431 | case NVPTXISD::Suld1DI32Zero: |
432 | case NVPTXISD::Suld1DI64Zero: |
433 | case NVPTXISD::Suld1DV2I8Zero: |
434 | case NVPTXISD::Suld1DV2I16Zero: |
435 | case NVPTXISD::Suld1DV2I32Zero: |
436 | case NVPTXISD::Suld1DV2I64Zero: |
437 | case NVPTXISD::Suld1DV4I8Zero: |
438 | case NVPTXISD::Suld1DV4I16Zero: |
439 | case NVPTXISD::Suld1DV4I32Zero: |
440 | case NVPTXISD::Suld1DArrayI8Zero: |
441 | case NVPTXISD::Suld1DArrayI16Zero: |
442 | case NVPTXISD::Suld1DArrayI32Zero: |
443 | case NVPTXISD::Suld1DArrayI64Zero: |
444 | case NVPTXISD::Suld1DArrayV2I8Zero: |
445 | case NVPTXISD::Suld1DArrayV2I16Zero: |
446 | case NVPTXISD::Suld1DArrayV2I32Zero: |
447 | case NVPTXISD::Suld1DArrayV2I64Zero: |
448 | case NVPTXISD::Suld1DArrayV4I8Zero: |
449 | case NVPTXISD::Suld1DArrayV4I16Zero: |
450 | case NVPTXISD::Suld1DArrayV4I32Zero: |
451 | case NVPTXISD::Suld2DI8Zero: |
452 | case NVPTXISD::Suld2DI16Zero: |
453 | case NVPTXISD::Suld2DI32Zero: |
454 | case NVPTXISD::Suld2DI64Zero: |
455 | case NVPTXISD::Suld2DV2I8Zero: |
456 | case NVPTXISD::Suld2DV2I16Zero: |
457 | case NVPTXISD::Suld2DV2I32Zero: |
458 | case NVPTXISD::Suld2DV2I64Zero: |
459 | case NVPTXISD::Suld2DV4I8Zero: |
460 | case NVPTXISD::Suld2DV4I16Zero: |
461 | case NVPTXISD::Suld2DV4I32Zero: |
462 | case NVPTXISD::Suld2DArrayI8Zero: |
463 | case NVPTXISD::Suld2DArrayI16Zero: |
464 | case NVPTXISD::Suld2DArrayI32Zero: |
465 | case NVPTXISD::Suld2DArrayI64Zero: |
466 | case NVPTXISD::Suld2DArrayV2I8Zero: |
467 | case NVPTXISD::Suld2DArrayV2I16Zero: |
468 | case NVPTXISD::Suld2DArrayV2I32Zero: |
469 | case NVPTXISD::Suld2DArrayV2I64Zero: |
470 | case NVPTXISD::Suld2DArrayV4I8Zero: |
471 | case NVPTXISD::Suld2DArrayV4I16Zero: |
472 | case NVPTXISD::Suld2DArrayV4I32Zero: |
473 | case NVPTXISD::Suld3DI8Zero: |
474 | case NVPTXISD::Suld3DI16Zero: |
475 | case NVPTXISD::Suld3DI32Zero: |
476 | case NVPTXISD::Suld3DI64Zero: |
477 | case NVPTXISD::Suld3DV2I8Zero: |
478 | case NVPTXISD::Suld3DV2I16Zero: |
479 | case NVPTXISD::Suld3DV2I32Zero: |
480 | case NVPTXISD::Suld3DV2I64Zero: |
481 | case NVPTXISD::Suld3DV4I8Zero: |
482 | case NVPTXISD::Suld3DV4I16Zero: |
483 | case NVPTXISD::Suld3DV4I32Zero: |
484 | if (trySurfaceIntrinsic(N)) |
485 | return; |
486 | break; |
487 | case ISD::AND: |
488 | case ISD::SRA: |
489 | case ISD::SRL: |
490 | |
491 | if (tryBFE(N)) |
492 | return; |
493 | break; |
494 | case ISD::ADDRSPACECAST: |
495 | SelectAddrSpaceCast(N); |
496 | return; |
497 | case ISD::ConstantFP: |
498 | if (tryConstantFP16(N)) |
499 | return; |
500 | break; |
501 | default: |
502 | break; |
503 | } |
504 | SelectCode(N); |
505 | } |
506 | |
507 | bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { |
508 | unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
509 | switch (IID) { |
510 | default: |
511 | return false; |
512 | case Intrinsic::nvvm_ldg_global_f: |
513 | case Intrinsic::nvvm_ldg_global_i: |
514 | case Intrinsic::nvvm_ldg_global_p: |
515 | case Intrinsic::nvvm_ldu_global_f: |
516 | case Intrinsic::nvvm_ldu_global_i: |
517 | case Intrinsic::nvvm_ldu_global_p: |
518 | return tryLDGLDU(N); |
519 | } |
520 | } |
521 | |
522 | |
523 | |
524 | bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) { |
525 | if (N->getValueType(0) != MVT::f16) |
526 | return false; |
527 | SDValue Val = CurDAG->getTargetConstantFP( |
528 | cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16); |
529 | SDNode *LoadConstF16 = |
530 | CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val); |
531 | ReplaceNode(N, LoadConstF16); |
532 | return true; |
533 | } |
534 | |
535 | |
536 | |
537 | static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { |
538 | using NVPTX::PTXCmpMode::CmpMode; |
539 | unsigned PTXCmpMode = [](ISD::CondCode CC) { |
540 | switch (CC) { |
541 | default: |
542 | llvm_unreachable("Unexpected condition code."); |
543 | case ISD::SETOEQ: |
544 | return CmpMode::EQ; |
545 | case ISD::SETOGT: |
546 | return CmpMode::GT; |
547 | case ISD::SETOGE: |
548 | return CmpMode::GE; |
549 | case ISD::SETOLT: |
550 | return CmpMode::LT; |
551 | case ISD::SETOLE: |
552 | return CmpMode::LE; |
553 | case ISD::SETONE: |
554 | return CmpMode::NE; |
555 | case ISD::SETO: |
556 | return CmpMode::NUM; |
557 | case ISD::SETUO: |
558 | return CmpMode::NotANumber; |
559 | case ISD::SETUEQ: |
560 | return CmpMode::EQU; |
561 | case ISD::SETUGT: |
562 | return CmpMode::GTU; |
563 | case ISD::SETUGE: |
564 | return CmpMode::GEU; |
565 | case ISD::SETULT: |
566 | return CmpMode::LTU; |
567 | case ISD::SETULE: |
568 | return CmpMode::LEU; |
569 | case ISD::SETUNE: |
570 | return CmpMode::NEU; |
571 | case ISD::SETEQ: |
572 | return CmpMode::EQ; |
573 | case ISD::SETGT: |
574 | return CmpMode::GT; |
575 | case ISD::SETGE: |
576 | return CmpMode::GE; |
577 | case ISD::SETLT: |
578 | return CmpMode::LT; |
579 | case ISD::SETLE: |
580 | return CmpMode::LE; |
581 | case ISD::SETNE: |
582 | return CmpMode::NE; |
583 | } |
584 | }(CondCode.get()); |
585 | |
586 | if (FTZ) |
587 | PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; |
588 | |
589 | return PTXCmpMode; |
590 | } |
591 | |
592 | bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { |
593 | unsigned PTXCmpMode = |
594 | getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ()); |
595 | SDLoc DL(N); |
596 | SDNode *SetP = CurDAG->getMachineNode( |
597 | NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), |
598 | N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); |
599 | ReplaceNode(N, SetP); |
600 | return true; |
601 | } |
602 | |
603 | |
604 | |
605 | bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { |
606 | SDValue Vector = N->getOperand(0); |
607 | |
608 | |
609 | |
610 | if (Vector.getSimpleValueType() != MVT::v2f16) |
611 | return false; |
612 | |
613 | |
614 | SmallVector<SDNode *, 4> E0, E1; |
615 | for (auto U : Vector.getNode()->uses()) { |
616 | if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
617 | continue; |
618 | if (U->getOperand(0) != Vector) |
619 | continue; |
620 | if (const ConstantSDNode *IdxConst = |
621 | dyn_cast<ConstantSDNode>(U->getOperand(1))) { |
622 | if (IdxConst->getZExtValue() == 0) |
623 | E0.push_back(U); |
624 | else if (IdxConst->getZExtValue() == 1) |
625 | E1.push_back(U); |
626 | else |
627 | llvm_unreachable("Invalid vector index."); |
628 | } |
629 | } |
630 | |
631 | |
632 | |
633 | if (E0.empty() || E1.empty()) |
634 | return false; |
635 | |
636 | unsigned Op = NVPTX::SplitF16x2; |
637 | |
638 | |
639 | SDValue Source = Vector; |
640 | if (Vector->getOpcode() == ISD::BITCAST) { |
641 | Op = NVPTX::SplitI32toF16x2; |
642 | Source = Vector->getOperand(0); |
643 | } |
644 | |
645 | |
646 | SDNode *ScatterOp = |
647 | CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source); |
648 | for (auto *Node : E0) |
649 | ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); |
650 | for (auto *Node : E1) |
651 | ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1)); |
652 | |
653 | return true; |
654 | } |
655 | |
656 | static unsigned int getCodeAddrSpace(MemSDNode *N) { |
657 | const Value *Src = N->getMemOperand()->getValue(); |
658 | |
659 | if (!Src) |
660 | return NVPTX::PTXLdStInstCode::GENERIC; |
661 | |
662 | if (auto *PT = dyn_cast<PointerType>(Src->getType())) { |
663 | switch (PT->getAddressSpace()) { |
664 | case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; |
665 | case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; |
666 | case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; |
667 | case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; |
668 | case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; |
669 | case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; |
670 | default: break; |
671 | } |
672 | } |
673 | return NVPTX::PTXLdStInstCode::GENERIC; |
674 | } |
675 | |
676 | static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, |
677 | unsigned CodeAddrSpace, MachineFunction *F) { |
678 | |
679 | |
680 | |
681 | |
682 | |
683 | |
684 | |
685 | |
686 | |
687 | |
688 | |
689 | |
690 | |
691 | |
692 | |
693 | |
694 | |
695 | if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) |
696 | return false; |
697 | |
698 | if (N->isInvariant()) |
699 | return true; |
700 | |
701 | bool IsKernelFn = isKernelFunction(F->getFunction()); |
702 | |
703 | |
704 | |
705 | |
706 | SmallVector<const Value *, 8> Objs; |
707 | getUnderlyingObjects(N->getMemOperand()->getValue(), Objs); |
708 | |
709 | return all_of(Objs, [&](const Value *V) { |
710 | if (auto *A = dyn_cast<const Argument>(V)) |
711 | return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); |
712 | if (auto *GV = dyn_cast<const GlobalVariable>(V)) |
713 | return GV->isConstant(); |
714 | return false; |
715 | }); |
716 | } |
717 | |
718 | bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { |
719 | unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); |
720 | switch (IID) { |
721 | default: |
722 | return false; |
723 | case Intrinsic::nvvm_texsurf_handle_internal: |
724 | SelectTexSurfHandle(N); |
725 | return true; |
726 | } |
727 | } |
728 | |
729 | void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { |
730 | |
731 | SDValue Wrapper = N->getOperand(1); |
732 | SDValue GlobalVal = Wrapper.getOperand(0); |
733 | ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), |
734 | MVT::i64, GlobalVal)); |
735 | } |
736 | |
737 | void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { |
738 | SDValue Src = N->getOperand(0); |
739 | AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); |
740 | unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); |
741 | unsigned DstAddrSpace = CastN->getDestAddressSpace(); |
742 | assert(SrcAddrSpace != DstAddrSpace && |
743 | "addrspacecast must be between different address spaces"); |
744 | |
745 | if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { |
746 | |
747 | unsigned Opc; |
748 | switch (SrcAddrSpace) { |
749 | default: report_fatal_error("Bad address space in addrspacecast"); |
750 | case ADDRESS_SPACE_GLOBAL: |
751 | Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; |
752 | break; |
753 | case ADDRESS_SPACE_SHARED: |
754 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432 |
755 | : NVPTX::cvta_shared_yes_64) |
756 | : NVPTX::cvta_shared_yes; |
757 | break; |
758 | case ADDRESS_SPACE_CONST: |
759 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432 |
760 | : NVPTX::cvta_const_yes_64) |
761 | : NVPTX::cvta_const_yes; |
762 | break; |
763 | case ADDRESS_SPACE_LOCAL: |
764 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432 |
765 | : NVPTX::cvta_local_yes_64) |
766 | : NVPTX::cvta_local_yes; |
767 | break; |
768 | } |
769 | ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), |
770 | Src)); |
771 | return; |
772 | } else { |
773 | |
774 | if (SrcAddrSpace != 0) |
775 | report_fatal_error("Cannot cast between two non-generic address spaces"); |
776 | unsigned Opc; |
777 | switch (DstAddrSpace) { |
778 | default: report_fatal_error("Bad address space in addrspacecast"); |
779 | case ADDRESS_SPACE_GLOBAL: |
780 | Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 |
781 | : NVPTX::cvta_to_global_yes; |
782 | break; |
783 | case ADDRESS_SPACE_SHARED: |
784 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264 |
785 | : NVPTX::cvta_to_shared_yes_64) |
786 | : NVPTX::cvta_to_shared_yes; |
787 | break; |
788 | case ADDRESS_SPACE_CONST: |
789 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264 |
790 | : NVPTX::cvta_to_const_yes_64) |
791 | : NVPTX::cvta_to_const_yes; |
792 | break; |
793 | case ADDRESS_SPACE_LOCAL: |
794 | Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264 |
795 | : NVPTX::cvta_to_local_yes_64) |
796 | : NVPTX::cvta_to_local_yes; |
797 | break; |
798 | case ADDRESS_SPACE_PARAM: |
799 | Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 |
800 | : NVPTX::nvvm_ptr_gen_to_param; |
801 | break; |
802 | } |
803 | ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), |
804 | Src)); |
805 | return; |
806 | } |
807 | } |
808 | |
809 | |
810 | |
811 | static Optional<unsigned> pickOpcodeForVT( |
812 | MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, |
813 | unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16, |
814 | unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) { |
815 | switch (VT) { |
816 | case MVT::i1: |
817 | case MVT::i8: |
818 | return Opcode_i8; |
819 | case MVT::i16: |
820 | return Opcode_i16; |
821 | case MVT::i32: |
822 | return Opcode_i32; |
823 | case MVT::i64: |
824 | return Opcode_i64; |
825 | case MVT::f16: |
826 | return Opcode_f16; |
827 | case MVT::v2f16: |
828 | return Opcode_f16x2; |
829 | case MVT::f32: |
830 | return Opcode_f32; |
831 | case MVT::f64: |
832 | return Opcode_f64; |
833 | default: |
834 | return None; |
835 | } |
836 | } |
837 | |
838 | bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { |
839 | SDLoc dl(N); |
840 | MemSDNode *LD = cast<MemSDNode>(N); |
841 | assert(LD->readMem() && "Expected load"); |
842 | LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N); |
843 | EVT LoadedVT = LD->getMemoryVT(); |
844 | SDNode *NVPTXLD = nullptr; |
845 | |
846 | |
847 | if (PlainLoad && PlainLoad->isIndexed()) |
848 | return false; |
849 | |
850 | if (!LoadedVT.isSimple()) |
851 | return false; |
852 | |
853 | AtomicOrdering Ordering = LD->getSuccessOrdering(); |
854 | |
855 | |
856 | |
857 | |
858 | if (isStrongerThanMonotonic(Ordering)) |
859 | return false; |
860 | |
861 | |
862 | unsigned int CodeAddrSpace = getCodeAddrSpace(LD); |
863 | if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { |
864 | return tryLDGLDU(N); |
865 | } |
866 | |
867 | unsigned int PointerSize = |
868 | CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); |
869 | |
870 | |
871 | |
872 | |
873 | bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic; |
874 | if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && |
875 | CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && |
876 | CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) |
877 | isVolatile = false; |
878 | |
879 | |
880 | |
881 | |
882 | |
883 | |
884 | |
885 | MVT SimpleVT = LoadedVT.getSimpleVT(); |
886 | MVT ScalarVT = SimpleVT.getScalarType(); |
887 | |
888 | unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); |
889 | unsigned int fromType; |
890 | |
891 | |
892 | unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; |
893 | if (SimpleVT.isVector()) { |
894 | assert(LoadedVT == MVT::v2f16 && "Unexpected vector type"); |
895 | |
896 | fromTypeWidth = 32; |
897 | } |
898 | |
899 | if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD)) |
900 | fromType = NVPTX::PTXLdStInstCode::Signed; |
901 | else if (ScalarVT.isFloatingPoint()) |
902 | |
903 | fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped |
904 | : NVPTX::PTXLdStInstCode::Float; |
905 | else |
906 | fromType = NVPTX::PTXLdStInstCode::Unsigned; |
907 | |
908 | |
909 | SDValue Chain = N->getOperand(0); |
910 | SDValue N1 = N->getOperand(1); |
911 | SDValue Addr; |
912 | SDValue Offset, Base; |
913 | Optional<unsigned> Opcode; |
914 | MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; |
915 | |
916 | if (SelectDirectAddr(N1, Addr)) { |
917 | Opcode = pickOpcodeForVT( |
918 | TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, |
919 | NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, |
920 | NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); |
921 | if (!Opcode) |
922 | return false; |
923 | SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), |
924 | getI32Imm(vecType, dl), getI32Imm(fromType, dl), |
925 | getI32Imm(fromTypeWidth, dl), Addr, Chain }; |
926 | NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, |
927 | MVT::Other, Ops); |
928 | } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) |
929 | : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { |
930 | Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, |
931 | NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, |
932 | NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, |
933 | NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); |
934 | if (!Opcode) |
935 | return false; |
936 | SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), |
937 | getI32Imm(vecType, dl), getI32Imm(fromType, dl), |
938 | getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; |
939 | NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, |
940 | MVT::Other, Ops); |
941 | } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) |
942 | : SelectADDRri(N1.getNode(), N1, Base, Offset)) { |
943 | if (PointerSize == 64) |
944 | Opcode = pickOpcodeForVT( |
945 | TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, |
946 | NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, |
947 | NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); |
948 | else |
949 | Opcode = pickOpcodeForVT( |
950 | TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, |
951 | NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, |
952 | NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); |
953 | if (!Opcode) |
954 | return false; |
955 | SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), |
956 | getI32Imm(vecType, dl), getI32Imm(fromType, dl), |
957 | getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; |
958 | NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, |
959 | MVT::Other, Ops); |
960 | } else { |
961 | if (PointerSize == 64) |
962 | Opcode = pickOpcodeForVT( |
963 | TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, |
964 | NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, |
965 | NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, |
966 | NVPTX::LD_f64_areg_64); |
967 | else |
968 | Opcode = pickOpcodeForVT( |
969 | TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, |
970 | NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, |
971 | NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); |
972 | if (!Opcode) |
973 | return false; |
974 | SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), |
975 | getI32Imm(vecType, dl), getI32Imm(fromType, dl), |
976 | getI32Imm(fromTypeWidth, dl), N1, Chain }; |
977 | NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, |
978 | MVT::Other, Ops); |
979 | } |
980 | |
981 | if (!NVPTXLD) |
982 | return false; |
983 | |
984 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
985 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef}); |
986 | |
987 | ReplaceNode(N, NVPTXLD); |
988 | return true; |
989 | } |
990 | |
991 | bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { |
992 | |
993 | SDValue Chain = N->getOperand(0); |
994 | SDValue Op1 = N->getOperand(1); |
995 | SDValue Addr, Offset, Base; |
996 | Optional<unsigned> Opcode; |
997 | SDLoc DL(N); |
998 | SDNode *LD; |
999 | MemSDNode *MemSD = cast<MemSDNode>(N); |
1000 | EVT LoadedVT = MemSD->getMemoryVT(); |
1001 | |
1002 | if (!LoadedVT.isSimple()) |
1003 | return false; |
1004 | |
1005 | |
1006 | unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); |
1007 | if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { |
1008 | return tryLDGLDU(N); |
1009 | } |
1010 | |
1011 | unsigned int PointerSize = |
1012 | CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); |
1013 | |
1014 | |
1015 | |
1016 | bool IsVolatile = MemSD->isVolatile(); |
1017 | if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && |
1018 | CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && |
1019 | CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) |
1020 | IsVolatile = false; |
1021 | |
1022 | |
1023 | MVT SimpleVT = LoadedVT.getSimpleVT(); |
1024 | |
1025 | |
1026 | |
1027 | |
1028 | |
1029 | |
1030 | |
1031 | MVT ScalarVT = SimpleVT.getScalarType(); |
1032 | |
1033 | unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits()); |
1034 | unsigned int FromType; |
1035 | |
1036 | unsigned ExtensionType = cast<ConstantSDNode>( |
1037 | N->getOperand(N->getNumOperands() - 1))->getZExtValue(); |
1038 | if (ExtensionType == ISD::SEXTLOAD) |
1039 | FromType = NVPTX::PTXLdStInstCode::Signed; |
1040 | else if (ScalarVT.isFloatingPoint()) |
1041 | FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped |
1042 | : NVPTX::PTXLdStInstCode::Float; |
1043 | else |
1044 | FromType = NVPTX::PTXLdStInstCode::Unsigned; |
1045 | |
1046 | unsigned VecType; |
1047 | |
1048 | switch (N->getOpcode()) { |
1049 | case NVPTXISD::LoadV2: |
1050 | VecType = NVPTX::PTXLdStInstCode::V2; |
1051 | break; |
1052 | case NVPTXISD::LoadV4: |
1053 | VecType = NVPTX::PTXLdStInstCode::V4; |
1054 | break; |
1055 | default: |
1056 | return false; |
1057 | } |
1058 | |
1059 | EVT EltVT = N->getValueType(0); |
1060 | |
1061 | |
1062 | |
1063 | |
1064 | if (EltVT == MVT::v2f16) { |
1065 | assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode."); |
1066 | EltVT = MVT::i32; |
1067 | FromType = NVPTX::PTXLdStInstCode::Untyped; |
1068 | FromTypeWidth = 32; |
1069 | } |
1070 | |
1071 | if (SelectDirectAddr(Op1, Addr)) { |
1072 | switch (N->getOpcode()) { |
1073 | default: |
1074 | return false; |
1075 | case NVPTXISD::LoadV2: |
1076 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1077 | NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, |
1078 | NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, |
1079 | NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, |
1080 | NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); |
1081 | break; |
1082 | case NVPTXISD::LoadV4: |
1083 | Opcode = |
1084 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, |
1085 | NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None, |
1086 | NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, |
1087 | NVPTX::LDV_f32_v4_avar, None); |
1088 | break; |
1089 | } |
1090 | if (!Opcode) |
1091 | return false; |
1092 | SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), |
1093 | getI32Imm(VecType, DL), getI32Imm(FromType, DL), |
1094 | getI32Imm(FromTypeWidth, DL), Addr, Chain }; |
1095 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); |
1096 | } else if (PointerSize == 64 |
1097 | ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) |
1098 | : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { |
1099 | switch (N->getOpcode()) { |
1100 | default: |
1101 | return false; |
1102 | case NVPTXISD::LoadV2: |
1103 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1104 | NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, |
1105 | NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, |
1106 | NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, |
1107 | NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); |
1108 | break; |
1109 | case NVPTXISD::LoadV4: |
1110 | Opcode = |
1111 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, |
1112 | NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None, |
1113 | NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, |
1114 | NVPTX::LDV_f32_v4_asi, None); |
1115 | break; |
1116 | } |
1117 | if (!Opcode) |
1118 | return false; |
1119 | SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), |
1120 | getI32Imm(VecType, DL), getI32Imm(FromType, DL), |
1121 | getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; |
1122 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); |
1123 | } else if (PointerSize == 64 |
1124 | ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) |
1125 | : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { |
1126 | if (PointerSize == 64) { |
1127 | switch (N->getOpcode()) { |
1128 | default: |
1129 | return false; |
1130 | case NVPTXISD::LoadV2: |
1131 | Opcode = pickOpcodeForVT( |
1132 | EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, |
1133 | NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, |
1134 | NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, |
1135 | NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, |
1136 | NVPTX::LDV_f64_v2_ari_64); |
1137 | break; |
1138 | case NVPTXISD::LoadV4: |
1139 | Opcode = pickOpcodeForVT( |
1140 | EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, |
1141 | NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None, |
1142 | NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, |
1143 | NVPTX::LDV_f32_v4_ari_64, None); |
1144 | break; |
1145 | } |
1146 | } else { |
1147 | switch (N->getOpcode()) { |
1148 | default: |
1149 | return false; |
1150 | case NVPTXISD::LoadV2: |
1151 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1152 | NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, |
1153 | NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, |
1154 | NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, |
1155 | NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); |
1156 | break; |
1157 | case NVPTXISD::LoadV4: |
1158 | Opcode = |
1159 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, |
1160 | NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None, |
1161 | NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, |
1162 | NVPTX::LDV_f32_v4_ari, None); |
1163 | break; |
1164 | } |
1165 | } |
1166 | if (!Opcode) |
1167 | return false; |
1168 | SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), |
1169 | getI32Imm(VecType, DL), getI32Imm(FromType, DL), |
1170 | getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; |
1171 | |
1172 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); |
1173 | } else { |
1174 | if (PointerSize == 64) { |
1175 | switch (N->getOpcode()) { |
1176 | default: |
1177 | return false; |
1178 | case NVPTXISD::LoadV2: |
1179 | Opcode = pickOpcodeForVT( |
1180 | EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, |
1181 | NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, |
1182 | NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, |
1183 | NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, |
1184 | NVPTX::LDV_f64_v2_areg_64); |
1185 | break; |
1186 | case NVPTXISD::LoadV4: |
1187 | Opcode = pickOpcodeForVT( |
1188 | EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, |
1189 | NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None, |
1190 | NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, |
1191 | NVPTX::LDV_f32_v4_areg_64, None); |
1192 | break; |
1193 | } |
1194 | } else { |
1195 | switch (N->getOpcode()) { |
1196 | default: |
1197 | return false; |
1198 | case NVPTXISD::LoadV2: |
1199 | Opcode = |
1200 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, |
1201 | NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, |
1202 | NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, |
1203 | NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, |
1204 | NVPTX::LDV_f64_v2_areg); |
1205 | break; |
1206 | case NVPTXISD::LoadV4: |
1207 | Opcode = pickOpcodeForVT( |
1208 | EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, |
1209 | NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None, |
1210 | NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, |
1211 | NVPTX::LDV_f32_v4_areg, None); |
1212 | break; |
1213 | } |
1214 | } |
1215 | if (!Opcode) |
1216 | return false; |
1217 | SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), |
1218 | getI32Imm(VecType, DL), getI32Imm(FromType, DL), |
1219 | getI32Imm(FromTypeWidth, DL), Op1, Chain }; |
1220 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); |
1221 | } |
1222 | |
1223 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
1224 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); |
1225 | |
1226 | ReplaceNode(N, LD); |
1227 | return true; |
1228 | } |
1229 | |
1230 | bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { |
1231 | |
1232 | SDValue Chain = N->getOperand(0); |
1233 | SDValue Op1; |
1234 | MemSDNode *Mem; |
1235 | bool IsLDG = true; |
1236 | |
1237 | |
1238 | |
1239 | if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { |
1240 | Op1 = N->getOperand(2); |
1241 | Mem = cast<MemIntrinsicSDNode>(N); |
1242 | unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
1243 | switch (IID) { |
1244 | default: |
1245 | return false; |
1246 | case Intrinsic::nvvm_ldg_global_f: |
1247 | case Intrinsic::nvvm_ldg_global_i: |
1248 | case Intrinsic::nvvm_ldg_global_p: |
1249 | IsLDG = true; |
1250 | break; |
1251 | case Intrinsic::nvvm_ldu_global_f: |
1252 | case Intrinsic::nvvm_ldu_global_i: |
1253 | case Intrinsic::nvvm_ldu_global_p: |
1254 | IsLDG = false; |
1255 | break; |
1256 | } |
1257 | } else { |
1258 | Op1 = N->getOperand(1); |
1259 | Mem = cast<MemSDNode>(N); |
1260 | } |
1261 | |
1262 | Optional<unsigned> Opcode; |
1263 | SDLoc DL(N); |
1264 | SDNode *LD; |
1265 | SDValue Base, Offset, Addr; |
1266 | |
1267 | EVT EltVT = Mem->getMemoryVT(); |
1268 | unsigned NumElts = 1; |
1269 | if (EltVT.isVector()) { |
1270 | NumElts = EltVT.getVectorNumElements(); |
1271 | EltVT = EltVT.getVectorElementType(); |
1272 | |
1273 | if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) { |
1274 | assert(NumElts % 2 == 0 && "Vector must have even number of elements"); |
1275 | EltVT = MVT::v2f16; |
1276 | NumElts /= 2; |
1277 | } |
1278 | } |
1279 | |
1280 | |
1281 | |
1282 | |
1283 | EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; |
1284 | SmallVector<EVT, 5> InstVTs; |
1285 | for (unsigned i = 0; i != NumElts; ++i) { |
1286 | InstVTs.push_back(NodeVT); |
1287 | } |
1288 | InstVTs.push_back(MVT::Other); |
1289 | SDVTList InstVTList = CurDAG->getVTList(InstVTs); |
1290 | |
1291 | if (SelectDirectAddr(Op1, Addr)) { |
1292 | switch (N->getOpcode()) { |
1293 | default: |
1294 | return false; |
1295 | case ISD::LOAD: |
1296 | case ISD::INTRINSIC_W_CHAIN: |
1297 | if (IsLDG) |
1298 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1299 | NVPTX::INT_PTX_LDG_GLOBAL_i8avar, |
1300 | NVPTX::INT_PTX_LDG_GLOBAL_i16avar, |
1301 | NVPTX::INT_PTX_LDG_GLOBAL_i32avar, |
1302 | NVPTX::INT_PTX_LDG_GLOBAL_i64avar, |
1303 | NVPTX::INT_PTX_LDG_GLOBAL_f16avar, |
1304 | NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, |
1305 | NVPTX::INT_PTX_LDG_GLOBAL_f32avar, |
1306 | NVPTX::INT_PTX_LDG_GLOBAL_f64avar); |
1307 | else |
1308 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1309 | NVPTX::INT_PTX_LDU_GLOBAL_i8avar, |
1310 | NVPTX::INT_PTX_LDU_GLOBAL_i16avar, |
1311 | NVPTX::INT_PTX_LDU_GLOBAL_i32avar, |
1312 | NVPTX::INT_PTX_LDU_GLOBAL_i64avar, |
1313 | NVPTX::INT_PTX_LDU_GLOBAL_f16avar, |
1314 | NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, |
1315 | NVPTX::INT_PTX_LDU_GLOBAL_f32avar, |
1316 | NVPTX::INT_PTX_LDU_GLOBAL_f64avar); |
1317 | break; |
1318 | case NVPTXISD::LoadV2: |
1319 | case NVPTXISD::LDGV2: |
1320 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1321 | NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, |
1322 | NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, |
1323 | NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, |
1324 | NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, |
1325 | NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, |
1326 | NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, |
1327 | NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, |
1328 | NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); |
1329 | break; |
1330 | case NVPTXISD::LDUV2: |
1331 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1332 | NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, |
1333 | NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, |
1334 | NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, |
1335 | NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, |
1336 | NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, |
1337 | NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, |
1338 | NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, |
1339 | NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); |
1340 | break; |
1341 | case NVPTXISD::LoadV4: |
1342 | case NVPTXISD::LDGV4: |
1343 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1344 | NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, |
1345 | NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, |
1346 | NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None, |
1347 | NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, |
1348 | NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, |
1349 | NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None); |
1350 | break; |
1351 | case NVPTXISD::LDUV4: |
1352 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1353 | NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, |
1354 | NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, |
1355 | NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None, |
1356 | NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, |
1357 | NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, |
1358 | NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None); |
1359 | break; |
1360 | } |
1361 | if (!Opcode) |
1362 | return false; |
1363 | SDValue Ops[] = { Addr, Chain }; |
1364 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); |
1365 | } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) |
1366 | : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { |
1367 | if (TM.is64Bit()) { |
1368 | switch (N->getOpcode()) { |
1369 | default: |
1370 | return false; |
1371 | case ISD::LOAD: |
1372 | case ISD::INTRINSIC_W_CHAIN: |
1373 | if (IsLDG) |
1374 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1375 | NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, |
1376 | NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, |
1377 | NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, |
1378 | NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, |
1379 | NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, |
1380 | NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, |
1381 | NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, |
1382 | NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); |
1383 | else |
1384 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1385 | NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, |
1386 | NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, |
1387 | NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, |
1388 | NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, |
1389 | NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, |
1390 | NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, |
1391 | NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, |
1392 | NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); |
1393 | break; |
1394 | case NVPTXISD::LoadV2: |
1395 | case NVPTXISD::LDGV2: |
1396 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1397 | NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, |
1398 | NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, |
1399 | NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, |
1400 | NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, |
1401 | NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, |
1402 | NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, |
1403 | NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, |
1404 | NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); |
1405 | break; |
1406 | case NVPTXISD::LDUV2: |
1407 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1408 | NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, |
1409 | NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, |
1410 | NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, |
1411 | NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, |
1412 | NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, |
1413 | NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, |
1414 | NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, |
1415 | NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); |
1416 | break; |
1417 | case NVPTXISD::LoadV4: |
1418 | case NVPTXISD::LDGV4: |
1419 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1420 | NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, |
1421 | NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, |
1422 | NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None, |
1423 | NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, |
1424 | NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, |
1425 | NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None); |
1426 | break; |
1427 | case NVPTXISD::LDUV4: |
1428 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1429 | NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, |
1430 | NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, |
1431 | NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None, |
1432 | NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, |
1433 | NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, |
1434 | NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None); |
1435 | break; |
1436 | } |
1437 | } else { |
1438 | switch (N->getOpcode()) { |
1439 | default: |
1440 | return false; |
1441 | case ISD::LOAD: |
1442 | case ISD::INTRINSIC_W_CHAIN: |
1443 | if (IsLDG) |
1444 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1445 | NVPTX::INT_PTX_LDG_GLOBAL_i8ari, |
1446 | NVPTX::INT_PTX_LDG_GLOBAL_i16ari, |
1447 | NVPTX::INT_PTX_LDG_GLOBAL_i32ari, |
1448 | NVPTX::INT_PTX_LDG_GLOBAL_i64ari, |
1449 | NVPTX::INT_PTX_LDG_GLOBAL_f16ari, |
1450 | NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, |
1451 | NVPTX::INT_PTX_LDG_GLOBAL_f32ari, |
1452 | NVPTX::INT_PTX_LDG_GLOBAL_f64ari); |
1453 | else |
1454 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1455 | NVPTX::INT_PTX_LDU_GLOBAL_i8ari, |
1456 | NVPTX::INT_PTX_LDU_GLOBAL_i16ari, |
1457 | NVPTX::INT_PTX_LDU_GLOBAL_i32ari, |
1458 | NVPTX::INT_PTX_LDU_GLOBAL_i64ari, |
1459 | NVPTX::INT_PTX_LDU_GLOBAL_f16ari, |
1460 | NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, |
1461 | NVPTX::INT_PTX_LDU_GLOBAL_f32ari, |
1462 | NVPTX::INT_PTX_LDU_GLOBAL_f64ari); |
1463 | break; |
1464 | case NVPTXISD::LoadV2: |
1465 | case NVPTXISD::LDGV2: |
1466 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1467 | NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, |
1468 | NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, |
1469 | NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, |
1470 | NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, |
1471 | NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, |
1472 | NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, |
1473 | NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, |
1474 | NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); |
1475 | break; |
1476 | case NVPTXISD::LDUV2: |
1477 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1478 | NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, |
1479 | NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, |
1480 | NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, |
1481 | NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, |
1482 | NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, |
1483 | NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, |
1484 | NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, |
1485 | NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); |
1486 | break; |
1487 | case NVPTXISD::LoadV4: |
1488 | case NVPTXISD::LDGV4: |
1489 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1490 | NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, |
1491 | NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, |
1492 | NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None, |
1493 | NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, |
1494 | NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, |
1495 | NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None); |
1496 | break; |
1497 | case NVPTXISD::LDUV4: |
1498 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1499 | NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, |
1500 | NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, |
1501 | NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None, |
1502 | NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, |
1503 | NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, |
1504 | NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None); |
1505 | break; |
1506 | } |
1507 | } |
1508 | if (!Opcode) |
1509 | return false; |
1510 | SDValue Ops[] = {Base, Offset, Chain}; |
1511 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); |
1512 | } else { |
1513 | if (TM.is64Bit()) { |
1514 | switch (N->getOpcode()) { |
1515 | default: |
1516 | return false; |
1517 | case ISD::LOAD: |
1518 | case ISD::INTRINSIC_W_CHAIN: |
1519 | if (IsLDG) |
1520 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1521 | NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, |
1522 | NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, |
1523 | NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, |
1524 | NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, |
1525 | NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, |
1526 | NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, |
1527 | NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, |
1528 | NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); |
1529 | else |
1530 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1531 | NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, |
1532 | NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, |
1533 | NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, |
1534 | NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, |
1535 | NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, |
1536 | NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, |
1537 | NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, |
1538 | NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); |
1539 | break; |
1540 | case NVPTXISD::LoadV2: |
1541 | case NVPTXISD::LDGV2: |
1542 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1543 | NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, |
1544 | NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, |
1545 | NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, |
1546 | NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, |
1547 | NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, |
1548 | NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, |
1549 | NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, |
1550 | NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); |
1551 | break; |
1552 | case NVPTXISD::LDUV2: |
1553 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1554 | NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64, |
1555 | NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, |
1556 | NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, |
1557 | NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, |
1558 | NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, |
1559 | NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, |
1560 | NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, |
1561 | NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); |
1562 | break; |
1563 | case NVPTXISD::LoadV4: |
1564 | case NVPTXISD::LDGV4: |
1565 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1566 | NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, |
1567 | NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, |
1568 | NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None, |
1569 | NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, |
1570 | NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, |
1571 | NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None); |
1572 | break; |
1573 | case NVPTXISD::LDUV4: |
1574 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1575 | NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, |
1576 | NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, |
1577 | NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None, |
1578 | NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, |
1579 | NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, |
1580 | NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None); |
1581 | break; |
1582 | } |
1583 | } else { |
1584 | switch (N->getOpcode()) { |
1585 | default: |
1586 | return false; |
1587 | case ISD::LOAD: |
1588 | case ISD::INTRINSIC_W_CHAIN: |
1589 | if (IsLDG) |
1590 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1591 | NVPTX::INT_PTX_LDG_GLOBAL_i8areg, |
1592 | NVPTX::INT_PTX_LDG_GLOBAL_i16areg, |
1593 | NVPTX::INT_PTX_LDG_GLOBAL_i32areg, |
1594 | NVPTX::INT_PTX_LDG_GLOBAL_i64areg, |
1595 | NVPTX::INT_PTX_LDG_GLOBAL_f16areg, |
1596 | NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, |
1597 | NVPTX::INT_PTX_LDG_GLOBAL_f32areg, |
1598 | NVPTX::INT_PTX_LDG_GLOBAL_f64areg); |
1599 | else |
1600 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1601 | NVPTX::INT_PTX_LDU_GLOBAL_i8areg, |
1602 | NVPTX::INT_PTX_LDU_GLOBAL_i16areg, |
1603 | NVPTX::INT_PTX_LDU_GLOBAL_i32areg, |
1604 | NVPTX::INT_PTX_LDU_GLOBAL_i64areg, |
1605 | NVPTX::INT_PTX_LDU_GLOBAL_f16areg, |
1606 | NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, |
1607 | NVPTX::INT_PTX_LDU_GLOBAL_f32areg, |
1608 | NVPTX::INT_PTX_LDU_GLOBAL_f64areg); |
1609 | break; |
1610 | case NVPTXISD::LoadV2: |
1611 | case NVPTXISD::LDGV2: |
1612 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1613 | NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, |
1614 | NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, |
1615 | NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, |
1616 | NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, |
1617 | NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, |
1618 | NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, |
1619 | NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, |
1620 | NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); |
1621 | break; |
1622 | case NVPTXISD::LDUV2: |
1623 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1624 | NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32, |
1625 | NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, |
1626 | NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, |
1627 | NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, |
1628 | NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, |
1629 | NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, |
1630 | NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, |
1631 | NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); |
1632 | break; |
1633 | case NVPTXISD::LoadV4: |
1634 | case NVPTXISD::LDGV4: |
1635 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1636 | NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, |
1637 | NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, |
1638 | NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None, |
1639 | NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, |
1640 | NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, |
1641 | NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None); |
1642 | break; |
1643 | case NVPTXISD::LDUV4: |
1644 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1645 | NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, |
1646 | NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, |
1647 | NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None, |
1648 | NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, |
1649 | NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, |
1650 | NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None); |
1651 | break; |
1652 | } |
1653 | } |
1654 | if (!Opcode) |
1655 | return false; |
1656 | SDValue Ops[] = { Op1, Chain }; |
1657 | LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); |
1658 | } |
1659 | |
1660 | MachineMemOperand *MemRef = Mem->getMemOperand(); |
1661 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef}); |
1662 | |
1663 | |
1664 | |
1665 | |
1666 | |
1667 | |
1668 | |
1669 | |
1670 | |
1671 | |
1672 | |
1673 | |
1674 | EVT OrigType = N->getValueType(0); |
1675 | LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); |
1676 | |
1677 | if (OrigType != EltVT && LdNode) { |
1678 | |
1679 | |
1680 | |
1681 | bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; |
1682 | unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), |
1683 | EltVT.getSimpleVT(), IsSigned); |
1684 | |
1685 | |
1686 | |
1687 | for (unsigned i = 0; i != NumElts; ++i) { |
1688 | SDValue Res(LD, i); |
1689 | SDValue OrigVal(N, i); |
1690 | |
1691 | SDNode *CvtNode = |
1692 | CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, |
1693 | CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, |
1694 | DL, MVT::i32)); |
1695 | ReplaceUses(OrigVal, SDValue(CvtNode, 0)); |
1696 | } |
1697 | } |
1698 | |
1699 | ReplaceNode(N, LD); |
1700 | return true; |
1701 | } |
1702 | |
1703 | bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { |
1704 | SDLoc dl(N); |
1705 | MemSDNode *ST = cast<MemSDNode>(N); |
| |
1706 | assert(ST->writeMem() && "Expected store"); |
1707 | StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N); |
| 2 | | Assuming 'N' is not a 'StoreSDNode' | |
|
1708 | AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N); |
| 3 | | Assuming 'N' is not a 'AtomicSDNode' | |
|
| 4 | | 'AtomicStore' initialized to a null pointer value | |
|
1709 | assert((PlainStore || AtomicStore) && "Expected store"); |
1710 | EVT StoreVT = ST->getMemoryVT(); |
1711 | SDNode *NVPTXST = nullptr; |
1712 | |
1713 | |
1714 | if (PlainStore && PlainStore->isIndexed()) |
1715 | return false; |
1716 | |
1717 | if (!StoreVT.isSimple()) |
| |
| 8 | | Returning from 'EVT::isSimple' | |
|
| |
1718 | return false; |
1719 | |
1720 | AtomicOrdering Ordering = ST->getSuccessOrdering(); |
1721 | |
1722 | |
1723 | |
1724 | |
1725 | if (isStrongerThanMonotonic(Ordering)) |
| 10 | | Assuming the condition is false | |
|
| |
1726 | return false; |
1727 | |
1728 | |
1729 | unsigned int CodeAddrSpace = getCodeAddrSpace(ST); |
1730 | unsigned int PointerSize = |
1731 | CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); |
1732 | |
1733 | |
1734 | |
1735 | |
1736 | bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic; |
| 12 | | Assuming the condition is true | |
|
1737 | if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && |
| |
1738 | CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && |
1739 | CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) |
1740 | isVolatile = false; |
1741 | |
1742 | |
1743 | MVT SimpleVT = StoreVT.getSimpleVT(); |
1744 | unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; |
1745 | |
1746 | |
1747 | |
1748 | |
1749 | MVT ScalarVT = SimpleVT.getScalarType(); |
1750 | unsigned toTypeWidth = ScalarVT.getSizeInBits(); |
1751 | if (SimpleVT.isVector()) { |
| |
1752 | assert(StoreVT == MVT::v2f16 && "Unexpected vector type"); |
1753 | |
1754 | toTypeWidth = 32; |
1755 | } |
1756 | |
1757 | unsigned int toType; |
1758 | if (ScalarVT.isFloatingPoint()) |
| |
1759 | |
1760 | toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped |
1761 | : NVPTX::PTXLdStInstCode::Float; |
1762 | else |
1763 | toType = NVPTX::PTXLdStInstCode::Unsigned; |
1764 | |
1765 | |
1766 | SDValue Chain = ST->getChain(); |
1767 | SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); |
| |
| 17 | | Called C++ object pointer is null |
|
1768 | SDValue BasePtr = ST->getBasePtr(); |
1769 | SDValue Addr; |
1770 | SDValue Offset, Base; |
1771 | Optional<unsigned> Opcode; |
1772 | MVT::SimpleValueType SourceVT = |
1773 | Value.getNode()->getSimpleValueType(0).SimpleTy; |
1774 | |
1775 | if (SelectDirectAddr(BasePtr, Addr)) { |
1776 | Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, |
1777 | NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, |
1778 | NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, |
1779 | NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); |
1780 | if (!Opcode) |
1781 | return false; |
1782 | SDValue Ops[] = {Value, |
1783 | getI32Imm(isVolatile, dl), |
1784 | getI32Imm(CodeAddrSpace, dl), |
1785 | getI32Imm(vecType, dl), |
1786 | getI32Imm(toType, dl), |
1787 | getI32Imm(toTypeWidth, dl), |
1788 | Addr, |
1789 | Chain}; |
1790 | NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); |
1791 | } else if (PointerSize == 64 |
1792 | ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) |
1793 | : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { |
1794 | Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, |
1795 | NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, |
1796 | NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, |
1797 | NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); |
1798 | if (!Opcode) |
1799 | return false; |
1800 | SDValue Ops[] = {Value, |
1801 | getI32Imm(isVolatile, dl), |
1802 | getI32Imm(CodeAddrSpace, dl), |
1803 | getI32Imm(vecType, dl), |
1804 | getI32Imm(toType, dl), |
1805 | getI32Imm(toTypeWidth, dl), |
1806 | Base, |
1807 | Offset, |
1808 | Chain}; |
1809 | NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); |
1810 | } else if (PointerSize == 64 |
1811 | ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) |
1812 | : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { |
1813 | if (PointerSize == 64) |
1814 | Opcode = pickOpcodeForVT( |
1815 | SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, |
1816 | NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, |
1817 | NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); |
1818 | else |
1819 | Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, |
1820 | NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, |
1821 | NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, |
1822 | NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); |
1823 | if (!Opcode) |
1824 | return false; |
1825 | |
1826 | SDValue Ops[] = {Value, |
1827 | getI32Imm(isVolatile, dl), |
1828 | getI32Imm(CodeAddrSpace, dl), |
1829 | getI32Imm(vecType, dl), |
1830 | getI32Imm(toType, dl), |
1831 | getI32Imm(toTypeWidth, dl), |
1832 | Base, |
1833 | Offset, |
1834 | Chain}; |
1835 | NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); |
1836 | } else { |
1837 | if (PointerSize == 64) |
1838 | Opcode = |
1839 | pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, |
1840 | NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, |
1841 | NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, |
1842 | NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); |
1843 | else |
1844 | Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, |
1845 | NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, |
1846 | NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, |
1847 | NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); |
1848 | if (!Opcode) |
1849 | return false; |
1850 | SDValue Ops[] = {Value, |
1851 | getI32Imm(isVolatile, dl), |
1852 | getI32Imm(CodeAddrSpace, dl), |
1853 | getI32Imm(vecType, dl), |
1854 | getI32Imm(toType, dl), |
1855 | getI32Imm(toTypeWidth, dl), |
1856 | BasePtr, |
1857 | Chain}; |
1858 | NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); |
1859 | } |
1860 | |
1861 | if (!NVPTXST) |
1862 | return false; |
1863 | |
1864 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
1865 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef}); |
1866 | ReplaceNode(N, NVPTXST); |
1867 | return true; |
1868 | } |
1869 | |
1870 | bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { |
1871 | SDValue Chain = N->getOperand(0); |
1872 | SDValue Op1 = N->getOperand(1); |
1873 | SDValue Addr, Offset, Base; |
1874 | Optional<unsigned> Opcode; |
1875 | SDLoc DL(N); |
1876 | SDNode *ST; |
1877 | EVT EltVT = Op1.getValueType(); |
1878 | MemSDNode *MemSD = cast<MemSDNode>(N); |
1879 | EVT StoreVT = MemSD->getMemoryVT(); |
1880 | |
1881 | |
1882 | unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); |
1883 | if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { |
1884 | report_fatal_error("Cannot store to pointer that points to constant " |
1885 | "memory space"); |
1886 | } |
1887 | unsigned int PointerSize = |
1888 | CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); |
1889 | |
1890 | |
1891 | |
1892 | bool IsVolatile = MemSD->isVolatile(); |
1893 | if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && |
1894 | CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && |
1895 | CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) |
1896 | IsVolatile = false; |
1897 | |
1898 | |
1899 | |
1900 | assert(StoreVT.isSimple() && "Store value is not simple"); |
1901 | MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); |
1902 | unsigned ToTypeWidth = ScalarVT.getSizeInBits(); |
1903 | unsigned ToType; |
1904 | if (ScalarVT.isFloatingPoint()) |
1905 | ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped |
1906 | : NVPTX::PTXLdStInstCode::Float; |
1907 | else |
1908 | ToType = NVPTX::PTXLdStInstCode::Unsigned; |
1909 | |
1910 | SmallVector<SDValue, 12> StOps; |
1911 | SDValue N2; |
1912 | unsigned VecType; |
1913 | |
1914 | switch (N->getOpcode()) { |
1915 | case NVPTXISD::StoreV2: |
1916 | VecType = NVPTX::PTXLdStInstCode::V2; |
1917 | StOps.push_back(N->getOperand(1)); |
1918 | StOps.push_back(N->getOperand(2)); |
1919 | N2 = N->getOperand(3); |
1920 | break; |
1921 | case NVPTXISD::StoreV4: |
1922 | VecType = NVPTX::PTXLdStInstCode::V4; |
1923 | StOps.push_back(N->getOperand(1)); |
1924 | StOps.push_back(N->getOperand(2)); |
1925 | StOps.push_back(N->getOperand(3)); |
1926 | StOps.push_back(N->getOperand(4)); |
1927 | N2 = N->getOperand(5); |
1928 | break; |
1929 | default: |
1930 | return false; |
1931 | } |
1932 | |
1933 | |
1934 | |
1935 | |
1936 | if (EltVT == MVT::v2f16) { |
1937 | assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode."); |
1938 | EltVT = MVT::i32; |
1939 | ToType = NVPTX::PTXLdStInstCode::Untyped; |
1940 | ToTypeWidth = 32; |
1941 | } |
1942 | |
1943 | StOps.push_back(getI32Imm(IsVolatile, DL)); |
1944 | StOps.push_back(getI32Imm(CodeAddrSpace, DL)); |
1945 | StOps.push_back(getI32Imm(VecType, DL)); |
1946 | StOps.push_back(getI32Imm(ToType, DL)); |
1947 | StOps.push_back(getI32Imm(ToTypeWidth, DL)); |
1948 | |
1949 | if (SelectDirectAddr(N2, Addr)) { |
1950 | switch (N->getOpcode()) { |
1951 | default: |
1952 | return false; |
1953 | case NVPTXISD::StoreV2: |
1954 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1955 | NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, |
1956 | NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, |
1957 | NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, |
1958 | NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); |
1959 | break; |
1960 | case NVPTXISD::StoreV4: |
1961 | Opcode = |
1962 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, |
1963 | NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None, |
1964 | NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, |
1965 | NVPTX::STV_f32_v4_avar, None); |
1966 | break; |
1967 | } |
1968 | StOps.push_back(Addr); |
1969 | } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) |
1970 | : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { |
1971 | switch (N->getOpcode()) { |
1972 | default: |
1973 | return false; |
1974 | case NVPTXISD::StoreV2: |
1975 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
1976 | NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, |
1977 | NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, |
1978 | NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, |
1979 | NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); |
1980 | break; |
1981 | case NVPTXISD::StoreV4: |
1982 | Opcode = |
1983 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, |
1984 | NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None, |
1985 | NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, |
1986 | NVPTX::STV_f32_v4_asi, None); |
1987 | break; |
1988 | } |
1989 | StOps.push_back(Base); |
1990 | StOps.push_back(Offset); |
1991 | } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) |
1992 | : SelectADDRri(N2.getNode(), N2, Base, Offset)) { |
1993 | if (PointerSize == 64) { |
1994 | switch (N->getOpcode()) { |
1995 | default: |
1996 | return false; |
1997 | case NVPTXISD::StoreV2: |
1998 | Opcode = pickOpcodeForVT( |
1999 | EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, |
2000 | NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, |
2001 | NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, |
2002 | NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, |
2003 | NVPTX::STV_f64_v2_ari_64); |
2004 | break; |
2005 | case NVPTXISD::StoreV4: |
2006 | Opcode = pickOpcodeForVT( |
2007 | EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, |
2008 | NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None, |
2009 | NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, |
2010 | NVPTX::STV_f32_v4_ari_64, None); |
2011 | break; |
2012 | } |
2013 | } else { |
2014 | switch (N->getOpcode()) { |
2015 | default: |
2016 | return false; |
2017 | case NVPTXISD::StoreV2: |
2018 | Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, |
2019 | NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, |
2020 | NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, |
2021 | NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, |
2022 | NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); |
2023 | break; |
2024 | case NVPTXISD::StoreV4: |
2025 | Opcode = |
2026 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, |
2027 | NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None, |
2028 | NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, |
2029 | NVPTX::STV_f32_v4_ari, None); |
2030 | break; |
2031 | } |
2032 | } |
2033 | StOps.push_back(Base); |
2034 | StOps.push_back(Offset); |
2035 | } else { |
2036 | if (PointerSize == 64) { |
2037 | switch (N->getOpcode()) { |
2038 | default: |
2039 | return false; |
2040 | case NVPTXISD::StoreV2: |
2041 | Opcode = pickOpcodeForVT( |
2042 | EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, |
2043 | NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, |
2044 | NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, |
2045 | NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, |
2046 | NVPTX::STV_f64_v2_areg_64); |
2047 | break; |
2048 | case NVPTXISD::StoreV4: |
2049 | Opcode = pickOpcodeForVT( |
2050 | EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, |
2051 | NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None, |
2052 | NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, |
2053 | NVPTX::STV_f32_v4_areg_64, None); |
2054 | break; |
2055 | } |
2056 | } else { |
2057 | switch (N->getOpcode()) { |
2058 | default: |
2059 | return false; |
2060 | case NVPTXISD::StoreV2: |
2061 | Opcode = |
2062 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, |
2063 | NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, |
2064 | NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, |
2065 | NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, |
2066 | NVPTX::STV_f64_v2_areg); |
2067 | break; |
2068 | case NVPTXISD::StoreV4: |
2069 | Opcode = |
2070 | pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, |
2071 | NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None, |
2072 | NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, |
2073 | NVPTX::STV_f32_v4_areg, None); |
2074 | break; |
2075 | } |
2076 | } |
2077 | StOps.push_back(N2); |
2078 | } |
2079 | |
2080 | if (!Opcode) |
2081 | return false; |
2082 | |
2083 | StOps.push_back(Chain); |
2084 | |
2085 | ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps); |
2086 | |
2087 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
2088 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef}); |
2089 | |
2090 | ReplaceNode(N, ST); |
2091 | return true; |
2092 | } |
2093 | |
2094 | bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { |
2095 | SDValue Chain = Node->getOperand(0); |
2096 | SDValue Offset = Node->getOperand(2); |
2097 | SDValue Flag = Node->getOperand(3); |
2098 | SDLoc DL(Node); |
2099 | MemSDNode *Mem = cast<MemSDNode>(Node); |
2100 | |
2101 | unsigned VecSize; |
2102 | switch (Node->getOpcode()) { |
2103 | default: |
2104 | return false; |
2105 | case NVPTXISD::LoadParam: |
2106 | VecSize = 1; |
2107 | break; |
2108 | case NVPTXISD::LoadParamV2: |
2109 | VecSize = 2; |
2110 | break; |
2111 | case NVPTXISD::LoadParamV4: |
2112 | VecSize = 4; |
2113 | break; |
2114 | } |
2115 | |
2116 | EVT EltVT = Node->getValueType(0); |
2117 | EVT MemVT = Mem->getMemoryVT(); |
2118 | |
2119 | Optional<unsigned> Opcode; |
2120 | |
2121 | switch (VecSize) { |
2122 | default: |
2123 | return false; |
2124 | case 1: |
2125 | Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, |
2126 | NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, |
2127 | NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, |
2128 | NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, |
2129 | NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); |
2130 | break; |
2131 | case 2: |
2132 | Opcode = |
2133 | pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, |
2134 | NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, |
2135 | NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, |
2136 | NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, |
2137 | NVPTX::LoadParamMemV2F64); |
2138 | break; |
2139 | case 4: |
2140 | Opcode = pickOpcodeForVT( |
2141 | MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, |
2142 | NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None, |
2143 | NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, |
2144 | NVPTX::LoadParamMemV4F32, None); |
2145 | break; |
2146 | } |
2147 | if (!Opcode) |
2148 | return false; |
2149 | |
2150 | SDVTList VTs; |
2151 | if (VecSize == 1) { |
2152 | VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); |
2153 | } else if (VecSize == 2) { |
2154 | VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); |
2155 | } else { |
2156 | EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; |
2157 | VTs = CurDAG->getVTList(EVTs); |
2158 | } |
2159 | |
2160 | unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); |
2161 | |
2162 | SmallVector<SDValue, 2> Ops; |
2163 | Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); |
2164 | Ops.push_back(Chain); |
2165 | Ops.push_back(Flag); |
2166 | |
2167 | ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops)); |
2168 | return true; |
2169 | } |
2170 | |
2171 | bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { |
2172 | SDLoc DL(N); |
2173 | SDValue Chain = N->getOperand(0); |
2174 | SDValue Offset = N->getOperand(1); |
2175 | unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); |
2176 | MemSDNode *Mem = cast<MemSDNode>(N); |
2177 | |
2178 | |
2179 | unsigned NumElts = 1; |
2180 | switch (N->getOpcode()) { |
2181 | default: |
2182 | return false; |
2183 | case NVPTXISD::StoreRetval: |
2184 | NumElts = 1; |
2185 | break; |
2186 | case NVPTXISD::StoreRetvalV2: |
2187 | NumElts = 2; |
2188 | break; |
2189 | case NVPTXISD::StoreRetvalV4: |
2190 | NumElts = 4; |
2191 | break; |
2192 | } |
2193 | |
2194 | |
2195 | SmallVector<SDValue, 6> Ops; |
2196 | for (unsigned i = 0; i < NumElts; ++i) |
2197 | Ops.push_back(N->getOperand(i + 2)); |
2198 | Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); |
2199 | Ops.push_back(Chain); |
2200 | |
2201 | |
2202 | |
2203 | |
2204 | Optional<unsigned> Opcode = 0; |
2205 | switch (NumElts) { |
2206 | default: |
2207 | return false; |
2208 | case 1: |
2209 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2210 | NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, |
2211 | NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, |
2212 | NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, |
2213 | NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); |
2214 | break; |
2215 | case 2: |
2216 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2217 | NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, |
2218 | NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, |
2219 | NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, |
2220 | NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); |
2221 | break; |
2222 | case 4: |
2223 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2224 | NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, |
2225 | NVPTX::StoreRetvalV4I32, None, |
2226 | NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, |
2227 | NVPTX::StoreRetvalV4F32, None); |
2228 | break; |
2229 | } |
2230 | if (!Opcode) |
2231 | return false; |
2232 | |
2233 | SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); |
2234 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
2235 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); |
2236 | |
2237 | ReplaceNode(N, Ret); |
2238 | return true; |
2239 | } |
2240 | |
2241 | bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { |
2242 | SDLoc DL(N); |
2243 | SDValue Chain = N->getOperand(0); |
2244 | SDValue Param = N->getOperand(1); |
2245 | unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); |
2246 | SDValue Offset = N->getOperand(2); |
2247 | unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); |
2248 | MemSDNode *Mem = cast<MemSDNode>(N); |
2249 | SDValue Flag = N->getOperand(N->getNumOperands() - 1); |
2250 | |
2251 | |
2252 | unsigned NumElts = 1; |
2253 | switch (N->getOpcode()) { |
2254 | default: |
2255 | return false; |
2256 | case NVPTXISD::StoreParamU32: |
2257 | case NVPTXISD::StoreParamS32: |
2258 | case NVPTXISD::StoreParam: |
2259 | NumElts = 1; |
2260 | break; |
2261 | case NVPTXISD::StoreParamV2: |
2262 | NumElts = 2; |
2263 | break; |
2264 | case NVPTXISD::StoreParamV4: |
2265 | NumElts = 4; |
2266 | break; |
2267 | } |
2268 | |
2269 | |
2270 | SmallVector<SDValue, 8> Ops; |
2271 | for (unsigned i = 0; i < NumElts; ++i) |
2272 | Ops.push_back(N->getOperand(i + 3)); |
2273 | Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); |
2274 | Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); |
2275 | Ops.push_back(Chain); |
2276 | Ops.push_back(Flag); |
2277 | |
2278 | |
2279 | |
2280 | |
2281 | Optional<unsigned> Opcode = 0; |
2282 | switch (N->getOpcode()) { |
2283 | default: |
2284 | switch (NumElts) { |
2285 | default: |
2286 | return false; |
2287 | case 1: |
2288 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2289 | NVPTX::StoreParamI8, NVPTX::StoreParamI16, |
2290 | NVPTX::StoreParamI32, NVPTX::StoreParamI64, |
2291 | NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, |
2292 | NVPTX::StoreParamF32, NVPTX::StoreParamF64); |
2293 | break; |
2294 | case 2: |
2295 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2296 | NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, |
2297 | NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, |
2298 | NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, |
2299 | NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); |
2300 | break; |
2301 | case 4: |
2302 | Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, |
2303 | NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, |
2304 | NVPTX::StoreParamV4I32, None, |
2305 | NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, |
2306 | NVPTX::StoreParamV4F32, None); |
2307 | break; |
2308 | } |
2309 | if (!Opcode) |
2310 | return false; |
2311 | break; |
2312 | |
2313 | |
2314 | |
2315 | case NVPTXISD::StoreParamU32: { |
2316 | Opcode = NVPTX::StoreParamI32; |
2317 | SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, |
2318 | MVT::i32); |
2319 | SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, |
2320 | MVT::i32, Ops[0], CvtNone); |
2321 | Ops[0] = SDValue(Cvt, 0); |
2322 | break; |
2323 | } |
2324 | case NVPTXISD::StoreParamS32: { |
2325 | Opcode = NVPTX::StoreParamI32; |
2326 | SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, |
2327 | MVT::i32); |
2328 | SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, |
2329 | MVT::i32, Ops[0], CvtNone); |
2330 | Ops[0] = SDValue(Cvt, 0); |
2331 | break; |
2332 | } |
2333 | } |
2334 | |
2335 | SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
2336 | SDNode *Ret = |
2337 | CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops); |
2338 | MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand(); |
2339 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef}); |
2340 | |
2341 | ReplaceNode(N, Ret); |
2342 | return true; |
2343 | } |
2344 | |
2345 | bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { |
2346 | unsigned Opc = 0; |
2347 | |
2348 | switch (N->getOpcode()) { |
2349 | default: return false; |
2350 | case NVPTXISD::Tex1DFloatS32: |
2351 | Opc = NVPTX::TEX_1D_F32_S32; |
2352 | break; |
2353 | case NVPTXISD::Tex1DFloatFloat: |
2354 | Opc = NVPTX::TEX_1D_F32_F32; |
2355 | break; |
2356 | case NVPTXISD::Tex1DFloatFloatLevel: |
2357 | Opc = NVPTX::TEX_1D_F32_F32_LEVEL; |
2358 | break; |
2359 | case NVPTXISD::Tex1DFloatFloatGrad: |
2360 | Opc = NVPTX::TEX_1D_F32_F32_GRAD; |
2361 | break; |
2362 | case NVPTXISD::Tex1DS32S32: |
2363 | Opc = NVPTX::TEX_1D_S32_S32; |
2364 | break; |
2365 | case NVPTXISD::Tex1DS32Float: |
2366 | Opc = NVPTX::TEX_1D_S32_F32; |
2367 | break; |
2368 | case NVPTXISD::Tex1DS32FloatLevel: |
2369 | Opc = NVPTX::TEX_1D_S32_F32_LEVEL; |
2370 | break; |
2371 | case NVPTXISD::Tex1DS32FloatGrad: |
2372 | Opc = NVPTX::TEX_1D_S32_F32_GRAD; |
2373 | break; |
2374 | case NVPTXISD::Tex1DU32S32: |
2375 | Opc = NVPTX::TEX_1D_U32_S32; |
2376 | break; |
2377 | case NVPTXISD::Tex1DU32Float: |
2378 | Opc = NVPTX::TEX_1D_U32_F32; |
2379 | break; |
2380 | case NVPTXISD::Tex1DU32FloatLevel: |
2381 | Opc = NVPTX::TEX_1D_U32_F32_LEVEL; |
2382 | break; |
2383 | case NVPTXISD::Tex1DU32FloatGrad: |
2384 | Opc = NVPTX::TEX_1D_U32_F32_GRAD; |
2385 | break; |
2386 | case NVPTXISD::Tex1DArrayFloatS32: |
2387 | Opc = NVPTX::TEX_1D_ARRAY_F32_S32; |
2388 | break; |
2389 | case NVPTXISD::Tex1DArrayFloatFloat: |
2390 | Opc = NVPTX::TEX_1D_ARRAY_F32_F32; |
2391 | break; |
2392 | case NVPTXISD::Tex1DArrayFloatFloatLevel: |
2393 | Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; |
2394 | break; |
2395 | case NVPTXISD::Tex1DArrayFloatFloatGrad: |
2396 | Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; |
2397 | break; |
2398 | case NVPTXISD::Tex1DArrayS32S32: |
2399 | Opc = NVPTX::TEX_1D_ARRAY_S32_S32; |
2400 | break; |
2401 | case NVPTXISD::Tex1DArrayS32Float: |
2402 | Opc = NVPTX::TEX_1D_ARRAY_S32_F32; |
2403 | break; |
2404 | case NVPTXISD::Tex1DArrayS32FloatLevel: |
2405 | Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; |
2406 | break; |
2407 | case NVPTXISD::Tex1DArrayS32FloatGrad: |
2408 | Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; |
2409 | break; |
2410 | case NVPTXISD::Tex1DArrayU32S32: |
2411 | Opc = NVPTX::TEX_1D_ARRAY_U32_S32; |
2412 | break; |
2413 | case NVPTXISD::Tex1DArrayU32Float: |
2414 | Opc = NVPTX::TEX_1D_ARRAY_U32_F32; |
2415 | break; |
2416 | case NVPTXISD::Tex1DArrayU32FloatLevel: |
2417 | Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; |
2418 | break; |
2419 | case NVPTXISD::Tex1DArrayU32FloatGrad: |
2420 | Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; |
2421 | break; |
2422 | case NVPTXISD::Tex2DFloatS32: |
2423 | Opc = NVPTX::TEX_2D_F32_S32; |
2424 | break; |
2425 | case NVPTXISD::Tex2DFloatFloat: |
2426 | Opc = NVPTX::TEX_2D_F32_F32; |
2427 | break; |
2428 | case NVPTXISD::Tex2DFloatFloatLevel: |
2429 | Opc = NVPTX::TEX_2D_F32_F32_LEVEL; |
2430 | break; |
2431 | case NVPTXISD::Tex2DFloatFloatGrad: |
2432 | Opc = NVPTX::TEX_2D_F32_F32_GRAD; |
2433 | break; |
2434 | case NVPTXISD::Tex2DS32S32: |
2435 | Opc = NVPTX::TEX_2D_S32_S32; |
2436 | break; |
2437 | case NVPTXISD::Tex2DS32Float: |
2438 | Opc = NVPTX::TEX_2D_S32_F32; |
2439 | break; |
2440 | case NVPTXISD::Tex2DS32FloatLevel: |
2441 | Opc = NVPTX::TEX_2D_S32_F32_LEVEL; |
2442 | break; |
2443 | case NVPTXISD::Tex2DS32FloatGrad: |
2444 | Opc = NVPTX::TEX_2D_S32_F32_GRAD; |
2445 | break; |
2446 | case NVPTXISD::Tex2DU32S32: |
2447 | Opc = NVPTX::TEX_2D_U32_S32; |
2448 | break; |
2449 | case NVPTXISD::Tex2DU32Float: |
2450 | Opc = NVPTX::TEX_2D_U32_F32; |
2451 | break; |
2452 | case NVPTXISD::Tex2DU32FloatLevel: |
2453 | Opc = NVPTX::TEX_2D_U32_F32_LEVEL; |
2454 | break; |
2455 | case NVPTXISD::Tex2DU32FloatGrad: |
2456 | Opc = NVPTX::TEX_2D_U32_F32_GRAD; |
2457 | break; |
2458 | case NVPTXISD::Tex2DArrayFloatS32: |
2459 | Opc = NVPTX::TEX_2D_ARRAY_F32_S32; |
2460 | break; |
2461 | case NVPTXISD::Tex2DArrayFloatFloat: |
2462 | Opc = NVPTX::TEX_2D_ARRAY_F32_F32; |
2463 | break; |
2464 | case NVPTXISD::Tex2DArrayFloatFloatLevel: |
2465 | Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; |
2466 | break; |
2467 | case NVPTXISD::Tex2DArrayFloatFloatGrad: |
2468 | Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; |
2469 | break; |
2470 | case NVPTXISD::Tex2DArrayS32S32: |
2471 | Opc = NVPTX::TEX_2D_ARRAY_S32_S32; |
2472 | break; |
2473 | case NVPTXISD::Tex2DArrayS32Float: |
2474 | Opc = NVPTX::TEX_2D_ARRAY_S32_F32; |
2475 | break; |
2476 | case NVPTXISD::Tex2DArrayS32FloatLevel: |
2477 | Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; |
2478 | break; |
2479 | case NVPTXISD::Tex2DArrayS32FloatGrad: |
2480 | Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; |
2481 | break; |
2482 | case NVPTXISD::Tex2DArrayU32S32: |
2483 | Opc = NVPTX::TEX_2D_ARRAY_U32_S32; |
2484 | break; |
2485 | case NVPTXISD::Tex2DArrayU32Float: |
2486 | Opc = NVPTX::TEX_2D_ARRAY_U32_F32; |
2487 | break; |
2488 | case NVPTXISD::Tex2DArrayU32FloatLevel: |
2489 | Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; |
2490 | break; |
2491 | case NVPTXISD::Tex2DArrayU32FloatGrad: |
2492 | Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; |
2493 | break; |
2494 | case NVPTXISD::Tex3DFloatS32: |
2495 | Opc = NVPTX::TEX_3D_F32_S32; |
2496 | break; |
2497 | case NVPTXISD::Tex3DFloatFloat: |
2498 | Opc = NVPTX::TEX_3D_F32_F32; |
2499 | break; |
2500 | case NVPTXISD::Tex3DFloatFloatLevel: |
2501 | Opc = NVPTX::TEX_3D_F32_F32_LEVEL; |
2502 | break; |
2503 | case NVPTXISD::Tex3DFloatFloatGrad: |
2504 | Opc = NVPTX::TEX_3D_F32_F32_GRAD; |
2505 | break; |
2506 | case NVPTXISD::Tex3DS32S32: |
2507 | Opc = NVPTX::TEX_3D_S32_S32; |
2508 | break; |
2509 | case NVPTXISD::Tex3DS32Float: |
2510 | Opc = NVPTX::TEX_3D_S32_F32; |
2511 | break; |
2512 | case NVPTXISD::Tex3DS32FloatLevel: |
2513 | Opc = NVPTX::TEX_3D_S32_F32_LEVEL; |
2514 | break; |
2515 | case NVPTXISD::Tex3DS32FloatGrad: |
2516 | Opc = NVPTX::TEX_3D_S32_F32_GRAD; |
2517 | break; |
2518 | case NVPTXISD::Tex3DU32S32: |
2519 | Opc = NVPTX::TEX_3D_U32_S32; |
2520 | break; |
2521 | case NVPTXISD::Tex3DU32Float: |
2522 | Opc = NVPTX::TEX_3D_U32_F32; |
2523 | break; |
2524 | case NVPTXISD::Tex3DU32FloatLevel: |
2525 | Opc = NVPTX::TEX_3D_U32_F32_LEVEL; |
2526 | break; |
2527 | case NVPTXISD::Tex3DU32FloatGrad: |
2528 | Opc = NVPTX::TEX_3D_U32_F32_GRAD; |
2529 | break; |
2530 | case NVPTXISD::TexCubeFloatFloat: |
2531 | Opc = NVPTX::TEX_CUBE_F32_F32; |
2532 | break; |
2533 | case NVPTXISD::TexCubeFloatFloatLevel: |
2534 | Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; |
2535 | break; |
2536 | case NVPTXISD::TexCubeS32Float: |
2537 | Opc = NVPTX::TEX_CUBE_S32_F32; |
2538 | break; |
2539 | case NVPTXISD::TexCubeS32FloatLevel: |
2540 | Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; |
2541 | break; |
2542 | case NVPTXISD::TexCubeU32Float: |
2543 | Opc = NVPTX::TEX_CUBE_U32_F32; |
2544 | break; |
2545 | case NVPTXISD::TexCubeU32FloatLevel: |
2546 | Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; |
2547 | break; |
2548 | case NVPTXISD::TexCubeArrayFloatFloat: |
2549 | Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; |
2550 | break; |
2551 | case NVPTXISD::TexCubeArrayFloatFloatLevel: |
2552 | Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; |
2553 | break; |
2554 | case NVPTXISD::TexCubeArrayS32Float: |
2555 | Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; |
2556 | break; |
2557 | case NVPTXISD::TexCubeArrayS32FloatLevel: |
2558 | Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; |
2559 | break; |
2560 | case NVPTXISD::TexCubeArrayU32Float: |
2561 | Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; |
2562 | break; |
2563 | case NVPTXISD::TexCubeArrayU32FloatLevel: |
2564 | Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; |
2565 | break; |
2566 | case NVPTXISD::Tld4R2DFloatFloat: |
2567 | Opc = NVPTX::TLD4_R_2D_F32_F32; |
2568 | break; |
2569 | case NVPTXISD::Tld4G2DFloatFloat: |
2570 | Opc = NVPTX::TLD4_G_2D_F32_F32; |
2571 | break; |
2572 | case NVPTXISD::Tld4B2DFloatFloat: |
2573 | Opc = NVPTX::TLD4_B_2D_F32_F32; |
2574 | break; |
2575 | case NVPTXISD::Tld4A2DFloatFloat: |
2576 | Opc = NVPTX::TLD4_A_2D_F32_F32; |
2577 | break; |
2578 | case NVPTXISD::Tld4R2DS64Float: |
2579 | Opc = NVPTX::TLD4_R_2D_S32_F32; |
2580 | break; |
2581 | case NVPTXISD::Tld4G2DS64Float: |
2582 | Opc = NVPTX::TLD4_G_2D_S32_F32; |
2583 | break; |
2584 | case NVPTXISD::Tld4B2DS64Float: |
2585 | Opc = NVPTX::TLD4_B_2D_S32_F32; |
2586 | break; |
2587 | case NVPTXISD::Tld4A2DS64Float: |
2588 | Opc = NVPTX::TLD4_A_2D_S32_F32; |
2589 | break; |
2590 | case NVPTXISD::Tld4R2DU64Float: |
2591 | Opc = NVPTX::TLD4_R_2D_U32_F32; |
2592 | break; |
2593 | case NVPTXISD::Tld4G2DU64Float: |
2594 | Opc = NVPTX::TLD4_G_2D_U32_F32; |
2595 | break; |
2596 | case NVPTXISD::Tld4B2DU64Float: |
2597 | Opc = NVPTX::TLD4_B_2D_U32_F32; |
2598 | break; |
2599 | case NVPTXISD::Tld4A2DU64Float: |
2600 | Opc = NVPTX::TLD4_A_2D_U32_F32; |
2601 | break; |
2602 | case NVPTXISD::TexUnified1DFloatS32: |
2603 | Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; |
2604 | break; |
2605 | case NVPTXISD::TexUnified1DFloatFloat: |
2606 | Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; |
2607 | break; |
2608 | case NVPTXISD::TexUnified1DFloatFloatLevel: |
2609 | Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; |
2610 | break; |
2611 | case NVPTXISD::TexUnified1DFloatFloatGrad: |
2612 | Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; |
2613 | break; |
2614 | case NVPTXISD::TexUnified1DS32S32: |
2615 | Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; |
2616 | break; |
2617 | case NVPTXISD::TexUnified1DS32Float: |
2618 | Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; |
2619 | break; |
2620 | case NVPTXISD::TexUnified1DS32FloatLevel: |
2621 | Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; |
2622 | break; |
2623 | case NVPTXISD::TexUnified1DS32FloatGrad: |
2624 | Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; |
2625 | break; |
2626 | case NVPTXISD::TexUnified1DU32S32: |
2627 | Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; |
2628 | break; |
2629 | case NVPTXISD::TexUnified1DU32Float: |
2630 | Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; |
2631 | break; |
2632 | case NVPTXISD::TexUnified1DU32FloatLevel: |
2633 | Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; |
2634 | break; |
2635 | case NVPTXISD::TexUnified1DU32FloatGrad: |
2636 | Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; |
2637 | break; |
2638 | case NVPTXISD::TexUnified1DArrayFloatS32: |
2639 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; |
2640 | break; |
2641 | case NVPTXISD::TexUnified1DArrayFloatFloat: |
2642 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; |
2643 | break; |
2644 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: |
2645 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; |
2646 | break; |
2647 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: |
2648 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; |
2649 | break; |
2650 | case NVPTXISD::TexUnified1DArrayS32S32: |
2651 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; |
2652 | break; |
2653 | case NVPTXISD::TexUnified1DArrayS32Float: |
2654 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; |
2655 | break; |
2656 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: |
2657 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; |
2658 | break; |
2659 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: |
2660 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; |
2661 | break; |
2662 | case NVPTXISD::TexUnified1DArrayU32S32: |
2663 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; |
2664 | break; |
2665 | case NVPTXISD::TexUnified1DArrayU32Float: |
2666 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; |
2667 | break; |
2668 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: |
2669 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; |
2670 | break; |
2671 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: |
2672 | Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; |
2673 | break; |
2674 | case NVPTXISD::TexUnified2DFloatS32: |
2675 | Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; |
2676 | break; |
2677 | case NVPTXISD::TexUnified2DFloatFloat: |
2678 | Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; |
2679 | break; |
2680 | case NVPTXISD::TexUnified2DFloatFloatLevel: |
2681 | Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; |
2682 | break; |
2683 | case NVPTXISD::TexUnified2DFloatFloatGrad: |
2684 | Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; |
2685 | break; |
2686 | case NVPTXISD::TexUnified2DS32S32: |
2687 | Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; |
2688 | break; |
2689 | case NVPTXISD::TexUnified2DS32Float: |
2690 | Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; |
2691 | break; |
2692 | case NVPTXISD::TexUnified2DS32FloatLevel: |
2693 | Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; |
2694 | break; |
2695 | case NVPTXISD::TexUnified2DS32FloatGrad: |
2696 | Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; |
2697 | break; |
2698 | case NVPTXISD::TexUnified2DU32S32: |
2699 | Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; |
2700 | break; |
2701 | case NVPTXISD::TexUnified2DU32Float: |
2702 | Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; |
2703 | break; |
2704 | case NVPTXISD::TexUnified2DU32FloatLevel: |
2705 | Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; |
2706 | break; |
2707 | case NVPTXISD::TexUnified2DU32FloatGrad: |
2708 | Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; |
2709 | break; |
2710 | case NVPTXISD::TexUnified2DArrayFloatS32: |
2711 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; |
2712 | break; |
2713 | case NVPTXISD::TexUnified2DArrayFloatFloat: |
2714 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; |
2715 | break; |
2716 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: |
2717 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; |
2718 | break; |
2719 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: |
2720 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; |
2721 | break; |
2722 | case NVPTXISD::TexUnified2DArrayS32S32: |
2723 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; |
2724 | break; |
2725 | case NVPTXISD::TexUnified2DArrayS32Float: |
2726 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; |
2727 | break; |
2728 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: |
2729 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; |
2730 | break; |
2731 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: |
2732 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; |
2733 | break; |
2734 | case NVPTXISD::TexUnified2DArrayU32S32: |
2735 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; |
2736 | break; |
2737 | case NVPTXISD::TexUnified2DArrayU32Float: |
2738 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; |
2739 | break; |
2740 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: |
2741 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; |
2742 | break; |
2743 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: |
2744 | Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; |
2745 | break; |
2746 | case NVPTXISD::TexUnified3DFloatS32: |
2747 | Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; |
2748 | break; |
2749 | case NVPTXISD::TexUnified3DFloatFloat: |
2750 | Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; |
2751 | break; |
2752 | case NVPTXISD::TexUnified3DFloatFloatLevel: |
2753 | Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; |
2754 | break; |
2755 | case NVPTXISD::TexUnified3DFloatFloatGrad: |
2756 | Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; |
2757 | break; |
2758 | case NVPTXISD::TexUnified3DS32S32: |
2759 | Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; |
2760 | break; |
2761 | case NVPTXISD::TexUnified3DS32Float: |
2762 | Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; |
2763 | break; |
2764 | case NVPTXISD::TexUnified3DS32FloatLevel: |
2765 | Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; |
2766 | break; |
2767 | case NVPTXISD::TexUnified3DS32FloatGrad: |
2768 | Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; |
2769 | break; |
2770 | case NVPTXISD::TexUnified3DU32S32: |
2771 | Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; |
2772 | break; |
2773 | case NVPTXISD::TexUnified3DU32Float: |
2774 | Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; |
2775 | break; |
2776 | case NVPTXISD::TexUnified3DU32FloatLevel: |
2777 | Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; |
2778 | break; |
2779 | case NVPTXISD::TexUnified3DU32FloatGrad: |
2780 | Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; |
2781 | break; |
2782 | case NVPTXISD::TexUnifiedCubeFloatFloat: |
2783 | Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; |
2784 | break; |
2785 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: |
2786 | Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; |
2787 | break; |
2788 | case NVPTXISD::TexUnifiedCubeS32Float: |
2789 | Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; |
2790 | break; |
2791 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: |
2792 | Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; |
2793 | break; |
2794 | case NVPTXISD::TexUnifiedCubeU32Float: |
2795 | Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; |
2796 | break; |
2797 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: |
2798 | Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; |
2799 | break; |
2800 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: |
2801 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; |
2802 | break; |
2803 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: |
2804 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; |
2805 | break; |
2806 | case NVPTXISD::TexUnifiedCubeArrayS32Float: |
2807 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; |
2808 | break; |
2809 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: |
2810 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; |
2811 | break; |
2812 | case NVPTXISD::TexUnifiedCubeArrayU32Float: |
2813 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; |
2814 | break; |
2815 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: |
2816 | Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; |
2817 | break; |
2818 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: |
2819 | Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; |
2820 | break; |
2821 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: |
2822 | Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; |
2823 | break; |
2824 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: |
2825 | Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; |
2826 | break; |
2827 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: |
2828 | Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; |
2829 | break; |
2830 | case NVPTXISD::Tld4UnifiedR2DS64Float: |
2831 | Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; |
2832 | break; |
2833 | case NVPTXISD::Tld4UnifiedG2DS64Float: |
2834 | Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; |
2835 | break; |
2836 | case NVPTXISD::Tld4UnifiedB2DS64Float: |
2837 | Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; |
2838 | break; |
2839 | case NVPTXISD::Tld4UnifiedA2DS64Float: |
2840 | Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; |
2841 | break; |
2842 | case NVPTXISD::Tld4UnifiedR2DU64Float: |
2843 | Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; |
2844 | break; |
2845 | case NVPTXISD::Tld4UnifiedG2DU64Float: |
2846 | Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; |
2847 | break; |
2848 | case NVPTXISD::Tld4UnifiedB2DU64Float: |
2849 | Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; |
2850 | break; |
2851 | case NVPTXISD::Tld4UnifiedA2DU64Float: |
2852 | Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; |
2853 | break; |
2854 | } |
2855 | |
2856 | |
2857 | SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); |
2858 | Ops.push_back(N->getOperand(0)); |
2859 | |
2860 | ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); |
2861 | return true; |
2862 | } |
2863 | |
2864 | bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { |
2865 | unsigned Opc = 0; |
2866 | switch (N->getOpcode()) { |
2867 | default: return false; |
2868 | case NVPTXISD::Suld1DI8Clamp: |
2869 | Opc = NVPTX::SULD_1D_I8_CLAMP; |
2870 | break; |
2871 | case NVPTXISD::Suld1DI16Clamp: |
2872 | Opc = NVPTX::SULD_1D_I16_CLAMP; |
2873 | break; |
2874 | case NVPTXISD::Suld1DI32Clamp: |
2875 | Opc = NVPTX::SULD_1D_I32_CLAMP; |
2876 | break; |
2877 | case NVPTXISD::Suld1DI64Clamp: |
2878 | Opc = NVPTX::SULD_1D_I64_CLAMP; |
2879 | break; |
2880 | case NVPTXISD::Suld1DV2I8Clamp: |
2881 | Opc = NVPTX::SULD_1D_V2I8_CLAMP; |
2882 | break; |
2883 | case NVPTXISD::Suld1DV2I16Clamp: |
2884 | Opc = NVPTX::SULD_1D_V2I16_CLAMP; |
2885 | break; |
2886 | case NVPTXISD::Suld1DV2I32Clamp: |
2887 | Opc = NVPTX::SULD_1D_V2I32_CLAMP; |
2888 | break; |
2889 | case NVPTXISD::Suld1DV2I64Clamp: |
2890 | Opc = NVPTX::SULD_1D_V2I64_CLAMP; |
2891 | break; |
2892 | case NVPTXISD::Suld1DV4I8Clamp: |
2893 | Opc = NVPTX::SULD_1D_V4I8_CLAMP; |
2894 | break; |
2895 | case NVPTXISD::Suld1DV4I16Clamp: |
2896 | Opc = NVPTX::SULD_1D_V4I16_CLAMP; |
2897 | break; |
2898 | case NVPTXISD::Suld1DV4I32Clamp: |
2899 | Opc = NVPTX::SULD_1D_V4I32_CLAMP; |
2900 | break; |
2901 | case NVPTXISD::Suld1DArrayI8Clamp: |
2902 | Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; |
2903 | break; |
2904 | case NVPTXISD::Suld1DArrayI16Clamp: |
2905 | Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; |
2906 | break; |
2907 | case NVPTXISD::Suld1DArrayI32Clamp: |
2908 | Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; |
2909 | break; |
2910 | case NVPTXISD::Suld1DArrayI64Clamp: |
2911 | Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; |
2912 | break; |
2913 | case NVPTXISD::Suld1DArrayV2I8Clamp: |
2914 | Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; |
2915 | break; |
2916 | case NVPTXISD::Suld1DArrayV2I16Clamp: |
2917 | Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; |
2918 | break; |
2919 | case NVPTXISD::Suld1DArrayV2I32Clamp: |
2920 | Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; |
2921 | break; |
2922 | case NVPTXISD::Suld1DArrayV2I64Clamp: |
2923 | Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; |
2924 | break; |
2925 | case NVPTXISD::Suld1DArrayV4I8Clamp: |
2926 | Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; |
2927 | break; |
2928 | case NVPTXISD::Suld1DArrayV4I16Clamp: |
2929 | Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; |
2930 | break; |
2931 | case NVPTXISD::Suld1DArrayV4I32Clamp: |
2932 | Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; |
2933 | break; |
2934 | case NVPTXISD::Suld2DI8Clamp: |
2935 | Opc = NVPTX::SULD_2D_I8_CLAMP; |
2936 | break; |
2937 | case NVPTXISD::Suld2DI16Clamp: |
2938 | Opc = NVPTX::SULD_2D_I16_CLAMP; |
2939 | break; |
2940 | case NVPTXISD::Suld2DI32Clamp: |
2941 | Opc = NVPTX::SULD_2D_I32_CLAMP; |
2942 | break; |
2943 | case NVPTXISD::Suld2DI64Clamp: |
2944 | Opc = NVPTX::SULD_2D_I64_CLAMP; |
2945 | break; |
2946 | case NVPTXISD::Suld2DV2I8Clamp: |
2947 | Opc = NVPTX::SULD_2D_V2I8_CLAMP; |
2948 | break; |
2949 | case NVPTXISD::Suld2DV2I16Clamp: |
2950 | Opc = NVPTX::SULD_2D_V2I16_CLAMP; |
2951 | break; |
2952 | case NVPTXISD::Suld2DV2I32Clamp: |
2953 | Opc = NVPTX::SULD_2D_V2I32_CLAMP; |
2954 | break; |
2955 | case NVPTXISD::Suld2DV2I64Clamp: |
2956 | Opc = NVPTX::SULD_2D_V2I64_CLAMP; |
2957 | break; |
2958 | case NVPTXISD::Suld2DV4I8Clamp: |
2959 | Opc = NVPTX::SULD_2D_V4I8_CLAMP; |
2960 | break; |
2961 | case NVPTXISD::Suld2DV4I16Clamp: |
2962 | Opc = NVPTX::SULD_2D_V4I16_CLAMP; |
2963 | break; |
2964 | case NVPTXISD::Suld2DV4I32Clamp: |
2965 | Opc = NVPTX::SULD_2D_V4I32_CLAMP; |
2966 | break; |
2967 | case NVPTXISD::Suld2DArrayI8Clamp: |
2968 | Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; |
2969 | break; |
2970 | case NVPTXISD::Suld2DArrayI16Clamp: |
2971 | Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; |
2972 | break; |
2973 | case NVPTXISD::Suld2DArrayI32Clamp: |
2974 | Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; |
2975 | break; |
2976 | case NVPTXISD::Suld2DArrayI64Clamp: |
2977 | Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; |
2978 | break; |
2979 | case NVPTXISD::Suld2DArrayV2I8Clamp: |
2980 | Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; |
2981 | break; |
2982 | case NVPTXISD::Suld2DArrayV2I16Clamp: |
2983 | Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; |
2984 | break; |
2985 | case NVPTXISD::Suld2DArrayV2I32Clamp: |
2986 | Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; |
2987 | break; |
2988 | case NVPTXISD::Suld2DArrayV2I64Clamp: |
2989 | Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; |
2990 | break; |
2991 | case NVPTXISD::Suld2DArrayV4I8Clamp: |
2992 | Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; |
2993 | break; |
2994 | case NVPTXISD::Suld2DArrayV4I16Clamp: |
2995 | Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; |
2996 | break; |
2997 | case NVPTXISD::Suld2DArrayV4I32Clamp: |
2998 | Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; |
2999 | break; |
3000 | case NVPTXISD::Suld3DI8Clamp: |
3001 | Opc = NVPTX::SULD_3D_I8_CLAMP; |
3002 | break; |
3003 | case NVPTXISD::Suld3DI16Clamp: |
3004 | Opc = NVPTX::SULD_3D_I16_CLAMP; |
3005 | break; |
3006 | case NVPTXISD::Suld3DI32Clamp: |
3007 | Opc = NVPTX::SULD_3D_I32_CLAMP; |
3008 | break; |
3009 | case NVPTXISD::Suld3DI64Clamp: |
3010 | Opc = NVPTX::SULD_3D_I64_CLAMP; |
3011 | break; |
3012 | case NVPTXISD::Suld3DV2I8Clamp: |
3013 | Opc = NVPTX::SULD_3D_V2I8_CLAMP; |
3014 | break; |
3015 | case NVPTXISD::Suld3DV2I16Clamp: |
3016 | Opc = NVPTX::SULD_3D_V2I16_CLAMP; |
3017 | break; |
3018 | case NVPTXISD::Suld3DV2I32Clamp: |
3019 | Opc = NVPTX::SULD_3D_V2I32_CLAMP; |
3020 | break; |
3021 | case NVPTXISD::Suld3DV2I64Clamp: |
3022 | Opc = NVPTX::SULD_3D_V2I64_CLAMP; |
3023 | break; |
3024 | case NVPTXISD::Suld3DV4I8Clamp: |
3025 | Opc = NVPTX::SULD_3D_V4I8_CLAMP; |
3026 | break; |
3027 | case NVPTXISD::Suld3DV4I16Clamp: |
3028 | Opc = NVPTX::SULD_3D_V4I16_CLAMP; |
3029 | break; |
3030 | case NVPTXISD::Suld3DV4I32Clamp: |
3031 | Opc = NVPTX::SULD_3D_V4I32_CLAMP; |
3032 | break; |
3033 | case NVPTXISD::Suld1DI8Trap: |
3034 | Opc = NVPTX::SULD_1D_I8_TRAP; |
3035 | break; |
3036 | case NVPTXISD::Suld1DI16Trap: |
3037 | Opc = NVPTX::SULD_1D_I16_TRAP; |
3038 | break; |
3039 | case NVPTXISD::Suld1DI32Trap: |
3040 | Opc = NVPTX::SULD_1D_I32_TRAP; |
3041 | break; |
3042 | case NVPTXISD::Suld1DI64Trap: |
3043 | Opc = NVPTX::SULD_1D_I64_TRAP; |
3044 | break; |
3045 | case NVPTXISD::Suld1DV2I8Trap: |
3046 | Opc = NVPTX::SULD_1D_V2I8_TRAP; |
3047 | break; |
3048 | case NVPTXISD::Suld1DV2I16Trap: |
3049 | Opc = NVPTX::SULD_1D_V2I16_TRAP; |
3050 | break; |
3051 | case NVPTXISD::Suld1DV2I32Trap: |
3052 | Opc = NVPTX::SULD_1D_V2I32_TRAP; |
3053 | break; |
3054 | case NVPTXISD::Suld1DV2I64Trap: |
3055 | Opc = NVPTX::SULD_1D_V2I64_TRAP; |
3056 | break; |
3057 | case NVPTXISD::Suld1DV4I8Trap: |
3058 | Opc = NVPTX::SULD_1D_V4I8_TRAP; |
3059 | break; |
3060 | case NVPTXISD::Suld1DV4I16Trap: |
3061 | Opc = NVPTX::SULD_1D_V4I16_TRAP; |
3062 | break; |
3063 | case NVPTXISD::Suld1DV4I32Trap: |
3064 | Opc = NVPTX::SULD_1D_V4I32_TRAP; |
3065 | break; |
3066 | case NVPTXISD::Suld1DArrayI8Trap: |
3067 | Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; |
3068 | break; |
3069 | case NVPTXISD::Suld1DArrayI16Trap: |
3070 | Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; |
3071 | break; |
3072 | case NVPTXISD::Suld1DArrayI32Trap: |
3073 | Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; |
3074 | break; |
3075 | case NVPTXISD::Suld1DArrayI64Trap: |
3076 | Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; |
3077 | break; |
3078 | case NVPTXISD::Suld1DArrayV2I8Trap: |
3079 | Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; |
3080 | break; |
3081 | case NVPTXISD::Suld1DArrayV2I16Trap: |
3082 | Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; |
3083 | break; |
3084 | case NVPTXISD::Suld1DArrayV2I32Trap: |
3085 | Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; |
3086 | break; |
3087 | case NVPTXISD::Suld1DArrayV2I64Trap: |
3088 | Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; |
3089 | break; |
3090 | case NVPTXISD::Suld1DArrayV4I8Trap: |
3091 | Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; |
3092 | break; |
3093 | case NVPTXISD::Suld1DArrayV4I16Trap: |
3094 | Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; |
3095 | break; |
3096 | case NVPTXISD::Suld1DArrayV4I32Trap: |
3097 | Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; |
3098 | break; |
3099 | case NVPTXISD::Suld2DI8Trap: |
3100 | Opc = NVPTX::SULD_2D_I8_TRAP; |
3101 | break; |
3102 | case NVPTXISD::Suld2DI16Trap: |
3103 | Opc = NVPTX::SULD_2D_I16_TRAP; |
3104 | break; |
3105 | case NVPTXISD::Suld2DI32Trap: |
3106 | Opc = NVPTX::SULD_2D_I32_TRAP; |
3107 | break; |
3108 | case NVPTXISD::Suld2DI64Trap: |
3109 | Opc = NVPTX::SULD_2D_I64_TRAP; |
3110 | break; |
3111 | case NVPTXISD::Suld2DV2I8Trap: |
3112 | Opc = NVPTX::SULD_2D_V2I8_TRAP; |
3113 | break; |
3114 | case NVPTXISD::Suld2DV2I16Trap: |
3115 | Opc = NVPTX::SULD_2D_V2I16_TRAP; |
3116 | break; |
3117 | case NVPTXISD::Suld2DV2I32Trap: |
3118 | Opc = NVPTX::SULD_2D_V2I32_TRAP; |
3119 | break; |
3120 | case NVPTXISD::Suld2DV2I64Trap: |
3121 | Opc = NVPTX::SULD_2D_V2I64_TRAP; |
3122 | break; |
3123 | case NVPTXISD::Suld2DV4I8Trap: |
3124 | Opc = NVPTX::SULD_2D_V4I8_TRAP; |
3125 | break; |
3126 | case NVPTXISD::Suld2DV4I16Trap: |
3127 | Opc = NVPTX::SULD_2D_V4I16_TRAP; |
3128 | break; |
3129 | case NVPTXISD::Suld2DV4I32Trap: |
3130 | Opc = NVPTX::SULD_2D_V4I32_TRAP; |
3131 | break; |
3132 | case NVPTXISD::Suld2DArrayI8Trap: |
3133 | Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; |
3134 | break; |
3135 | case NVPTXISD::Suld2DArrayI16Trap: |
3136 | Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; |
3137 | break; |
3138 | case NVPTXISD::Suld2DArrayI32Trap: |
3139 | Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; |
3140 | break; |
3141 | case NVPTXISD::Suld2DArrayI64Trap: |
3142 | Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; |
3143 | break; |
3144 | case NVPTXISD::Suld2DArrayV2I8Trap: |
3145 | Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; |
3146 | break; |
3147 | case NVPTXISD::Suld2DArrayV2I16Trap: |
3148 | Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; |
3149 | break; |
3150 | case NVPTXISD::Suld2DArrayV2I32Trap: |
3151 | Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; |
3152 | break; |
3153 | case NVPTXISD::Suld2DArrayV2I64Trap: |
3154 | Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; |
3155 | break; |
3156 | case NVPTXISD::Suld2DArrayV4I8Trap: |
3157 | Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; |
3158 | break; |
3159 | case NVPTXISD::Suld2DArrayV4I16Trap: |
3160 | Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; |
3161 | break; |
3162 | case NVPTXISD::Suld2DArrayV4I32Trap: |
3163 | Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; |
3164 | break; |
3165 | case NVPTXISD::Suld3DI8Trap: |
3166 | Opc = NVPTX::SULD_3D_I8_TRAP; |
3167 | break; |
3168 | case NVPTXISD::Suld3DI16Trap: |
3169 | Opc = NVPTX::SULD_3D_I16_TRAP; |
3170 | break; |
3171 | case NVPTXISD::Suld3DI32Trap: |
3172 | Opc = NVPTX::SULD_3D_I32_TRAP; |
3173 | break; |
3174 | case NVPTXISD::Suld3DI64Trap: |
3175 | Opc = NVPTX::SULD_3D_I64_TRAP; |
3176 | break; |
3177 | case NVPTXISD::Suld3DV2I8Trap: |
3178 | Opc = NVPTX::SULD_3D_V2I8_TRAP; |
3179 | break; |
3180 | case NVPTXISD::Suld3DV2I16Trap: |
3181 | Opc = NVPTX::SULD_3D_V2I16_TRAP; |
3182 | break; |
3183 | case NVPTXISD::Suld3DV2I32Trap: |
3184 | Opc = NVPTX::SULD_3D_V2I32_TRAP; |
3185 | break; |
3186 | case NVPTXISD::Suld3DV2I64Trap: |
3187 | Opc = NVPTX::SULD_3D_V2I64_TRAP; |
3188 | break; |
3189 | case NVPTXISD::Suld3DV4I8Trap: |
3190 | Opc = NVPTX::SULD_3D_V4I8_TRAP; |
3191 | break; |
3192 | case NVPTXISD::Suld3DV4I16Trap: |
3193 | Opc = NVPTX::SULD_3D_V4I16_TRAP; |
3194 | break; |
3195 | case NVPTXISD::Suld3DV4I32Trap: |
3196 | Opc = NVPTX::SULD_3D_V4I32_TRAP; |
3197 | break; |
3198 | case NVPTXISD::Suld1DI8Zero: |
3199 | Opc = NVPTX::SULD_1D_I8_ZERO; |
3200 | break; |
3201 | case NVPTXISD::Suld1DI16Zero: |
3202 | Opc = NVPTX::SULD_1D_I16_ZERO; |
3203 | break; |
3204 | case NVPTXISD::Suld1DI32Zero: |
3205 | Opc = NVPTX::SULD_1D_I32_ZERO; |
3206 | break; |
3207 | case NVPTXISD::Suld1DI64Zero: |
3208 | Opc = NVPTX::SULD_1D_I64_ZERO; |
3209 | break; |
3210 | case NVPTXISD::Suld1DV2I8Zero: |
3211 | Opc = NVPTX::SULD_1D_V2I8_ZERO; |
3212 | break; |
3213 | case NVPTXISD::Suld1DV2I16Zero: |
3214 | Opc = NVPTX::SULD_1D_V2I16_ZERO; |
3215 | break; |
3216 | case NVPTXISD::Suld1DV2I32Zero: |
3217 | Opc = NVPTX::SULD_1D_V2I32_ZERO; |
3218 | break; |
3219 | case NVPTXISD::Suld1DV2I64Zero: |
3220 | Opc = NVPTX::SULD_1D_V2I64_ZERO; |
3221 | break; |
3222 | case NVPTXISD::Suld1DV4I8Zero: |
3223 | Opc = NVPTX::SULD_1D_V4I8_ZERO; |
3224 | break; |
3225 | case NVPTXISD::Suld1DV4I16Zero: |
3226 | Opc = NVPTX::SULD_1D_V4I16_ZERO; |
3227 | break; |
3228 | case NVPTXISD::Suld1DV4I32Zero: |
3229 | Opc = NVPTX::SULD_1D_V4I32_ZERO; |
3230 | break; |
3231 | case NVPTXISD::Suld1DArrayI8Zero: |
3232 | Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; |
3233 | break; |
3234 | case NVPTXISD::Suld1DArrayI16Zero: |
3235 | Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; |
3236 | break; |
3237 | case NVPTXISD::Suld1DArrayI32Zero: |
3238 | Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; |
3239 | break; |
3240 | case NVPTXISD::Suld1DArrayI64Zero: |
3241 | Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; |
3242 | break; |
3243 | case NVPTXISD::Suld1DArrayV2I8Zero: |
3244 | Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; |
3245 | break; |
3246 | case NVPTXISD::Suld1DArrayV2I16Zero: |
3247 | Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; |
3248 | break; |
3249 | case NVPTXISD::Suld1DArrayV2I32Zero: |
3250 | Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; |
3251 | break; |
3252 | case NVPTXISD::Suld1DArrayV2I64Zero: |
3253 | Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; |
3254 | break; |
3255 | case NVPTXISD::Suld1DArrayV4I8Zero: |
3256 | Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; |
3257 | break; |
3258 | case NVPTXISD::Suld1DArrayV4I16Zero: |
3259 | Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; |
3260 | break; |
3261 | case NVPTXISD::Suld1DArrayV4I32Zero: |
3262 | Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; |
3263 | break; |
3264 | case NVPTXISD::Suld2DI8Zero: |
3265 | Opc = NVPTX::SULD_2D_I8_ZERO; |
3266 | break; |
3267 | case NVPTXISD::Suld2DI16Zero: |
3268 | Opc = NVPTX::SULD_2D_I16_ZERO; |
3269 | break; |
3270 | case NVPTXISD::Suld2DI32Zero: |
3271 | Opc = NVPTX::SULD_2D_I32_ZERO; |
3272 | break; |
3273 | case NVPTXISD::Suld2DI64Zero: |
3274 | Opc = NVPTX::SULD_2D_I64_ZERO; |
3275 | break; |
3276 | case NVPTXISD::Suld2DV2I8Zero: |
3277 | Opc = NVPTX::SULD_2D_V2I8_ZERO; |
3278 | break; |
3279 | case NVPTXISD::Suld2DV2I16Zero: |
3280 | Opc = NVPTX::SULD_2D_V2I16_ZERO; |
3281 | break; |
3282 | case NVPTXISD::Suld2DV2I32Zero: |
3283 | Opc = NVPTX::SULD_2D_V2I32_ZERO; |
3284 | break; |
3285 | case NVPTXISD::Suld2DV2I64Zero: |
3286 | Opc = NVPTX::SULD_2D_V2I64_ZERO; |
3287 | break; |
3288 | case NVPTXISD::Suld2DV4I8Zero: |
3289 | Opc = NVPTX::SULD_2D_V4I8_ZERO; |
3290 | break; |
3291 | case NVPTXISD::Suld2DV4I16Zero: |
3292 | Opc = NVPTX::SULD_2D_V4I16_ZERO; |
3293 | break; |
3294 | case NVPTXISD::Suld2DV4I32Zero: |
3295 | Opc = NVPTX::SULD_2D_V4I32_ZERO; |
3296 | break; |
3297 | case NVPTXISD::Suld2DArrayI8Zero: |
3298 | Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; |
3299 | break; |
3300 | case NVPTXISD::Suld2DArrayI16Zero: |
3301 | Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; |
3302 | break; |
3303 | case NVPTXISD::Suld2DArrayI32Zero: |
3304 | Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; |
3305 | break; |
3306 | case NVPTXISD::Suld2DArrayI64Zero: |
3307 | Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; |
3308 | break; |
3309 | case NVPTXISD::Suld2DArrayV2I8Zero: |
3310 | Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; |
3311 | break; |
3312 | case NVPTXISD::Suld2DArrayV2I16Zero: |
3313 | Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; |
3314 | break; |
3315 | case NVPTXISD::Suld2DArrayV2I32Zero: |
3316 | Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; |
3317 | break; |
3318 | case NVPTXISD::Suld2DArrayV2I64Zero: |
3319 | Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; |
3320 | break; |
3321 | case NVPTXISD::Suld2DArrayV4I8Zero: |
3322 | Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; |
3323 | break; |
3324 | case NVPTXISD::Suld2DArrayV4I16Zero: |
3325 | Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; |
3326 | break; |
3327 | case NVPTXISD::Suld2DArrayV4I32Zero: |
3328 | Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; |
3329 | break; |
3330 | case NVPTXISD::Suld3DI8Zero: |
3331 | Opc = NVPTX::SULD_3D_I8_ZERO; |
3332 | break; |
3333 | case NVPTXISD::Suld3DI16Zero: |
3334 | Opc = NVPTX::SULD_3D_I16_ZERO; |
3335 | break; |
3336 | case NVPTXISD::Suld3DI32Zero: |
3337 | Opc = NVPTX::SULD_3D_I32_ZERO; |
3338 | break; |
3339 | case NVPTXISD::Suld3DI64Zero: |
3340 | Opc = NVPTX::SULD_3D_I64_ZERO; |
3341 | break; |
3342 | case NVPTXISD::Suld3DV2I8Zero: |
3343 | Opc = NVPTX::SULD_3D_V2I8_ZERO; |
3344 | break; |
3345 | case NVPTXISD::Suld3DV2I16Zero: |
3346 | Opc = NVPTX::SULD_3D_V2I16_ZERO; |
3347 | break; |
3348 | case NVPTXISD::Suld3DV2I32Zero: |
3349 | Opc = NVPTX::SULD_3D_V2I32_ZERO; |
3350 | break; |
3351 | case NVPTXISD::Suld3DV2I64Zero: |
3352 | Opc = NVPTX::SULD_3D_V2I64_ZERO; |
3353 | break; |
3354 | case NVPTXISD::Suld3DV4I8Zero: |
3355 | Opc = NVPTX::SULD_3D_V4I8_ZERO; |
3356 | break; |
3357 | case NVPTXISD::Suld3DV4I16Zero: |
3358 | Opc = NVPTX::SULD_3D_V4I16_ZERO; |
3359 | break; |
3360 | case NVPTXISD::Suld3DV4I32Zero: |
3361 | Opc = NVPTX::SULD_3D_V4I32_ZERO; |
3362 | break; |
3363 | } |
3364 | |
3365 | |
3366 | SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); |
3367 | Ops.push_back(N->getOperand(0)); |
3368 | |
3369 | ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); |
3370 | return true; |
3371 | } |
3372 | |
3373 | |
3374 | |
3375 | |
3376 | bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { |
3377 | SDLoc DL(N); |
3378 | SDValue LHS = N->getOperand(0); |
3379 | SDValue RHS = N->getOperand(1); |
3380 | SDValue Len; |
3381 | SDValue Start; |
3382 | SDValue Val; |
3383 | bool IsSigned = false; |
3384 | |
3385 | if (N->getOpcode() == ISD::AND) { |
3386 | |
3387 | |
3388 | if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { |
3389 | std::swap(LHS, RHS); |
3390 | } |
3391 | |
3392 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); |
3393 | if (!Mask) { |
3394 | |
3395 | return false; |
3396 | } |
3397 | |
3398 | |
3399 | uint64_t MaskVal = Mask->getZExtValue(); |
3400 | if (!isMask_64(MaskVal)) { |
3401 | |
3402 | |
3403 | |
3404 | return false; |
3405 | } |
3406 | |
3407 | |
3408 | uint64_t NumBits = countTrailingOnes(MaskVal); |
3409 | Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); |
3410 | |
3411 | if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { |
3412 | |
3413 | Val = LHS.getNode()->getOperand(0); |
3414 | Start = LHS.getNode()->getOperand(1); |
3415 | ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); |
3416 | if (StartConst) { |
3417 | uint64_t StartVal = StartConst->getZExtValue(); |
3418 | |
3419 | |
3420 | uint64_t GoodBits = Start.getValueSizeInBits() - StartVal; |
3421 | if (NumBits > GoodBits) { |
3422 | |
3423 | |
3424 | |
3425 | return false; |
3426 | } |
3427 | Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); |
3428 | } else { |
3429 | |
3430 | |
3431 | |
3432 | |
3433 | return false; |
3434 | } |
3435 | } else { |
3436 | |
3437 | |
3438 | |
3439 | return false; |
3440 | } |
3441 | } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { |
3442 | if (LHS->getOpcode() == ISD::AND) { |
3443 | ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); |
3444 | if (!ShiftCnst) { |
3445 | |
3446 | return false; |
3447 | } |
3448 | |
3449 | uint64_t ShiftAmt = ShiftCnst->getZExtValue(); |
3450 | |
3451 | SDValue AndLHS = LHS->getOperand(0); |
3452 | SDValue AndRHS = LHS->getOperand(1); |
3453 | |
3454 | |
3455 | if (isa<ConstantSDNode>(AndLHS)) { |
3456 | std::swap(AndLHS, AndRHS); |
3457 | } |
3458 | |
3459 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); |
3460 | if (!MaskCnst) { |
3461 | |
3462 | return false; |
3463 | } |
3464 | |
3465 | uint64_t MaskVal = MaskCnst->getZExtValue(); |
3466 | uint64_t NumZeros; |
3467 | uint64_t NumBits; |
3468 | if (isMask_64(MaskVal)) { |
3469 | NumZeros = 0; |
3470 | |
3471 | |
3472 | NumBits = countTrailingOnes(MaskVal) - ShiftAmt; |
3473 | } else if (isShiftedMask_64(MaskVal)) { |
3474 | NumZeros = countTrailingZeros(MaskVal); |
3475 | unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); |
3476 | |
3477 | |
3478 | |
3479 | NumBits = NumZeros + NumOnes - ShiftAmt; |
3480 | } else { |
3481 | |
3482 | return false; |
3483 | } |
3484 | |
3485 | if (ShiftAmt < NumZeros) { |
3486 | |
3487 | |
3488 | return false; |
3489 | } |
3490 | |
3491 | Val = AndLHS; |
3492 | Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); |
3493 | Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); |
3494 | } else if (LHS->getOpcode() == ISD::SHL) { |
3495 | |
3496 | |
3497 | |
3498 | |
3499 | |
3500 | |
3501 | |
3502 | Val = LHS->getOperand(0); |
3503 | |
3504 | SDValue ShlRHS = LHS->getOperand(1); |
3505 | ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); |
3506 | if (!ShlCnst) { |
3507 | |
3508 | return false; |
3509 | } |
3510 | uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); |
3511 | |
3512 | SDValue ShrRHS = RHS; |
3513 | ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); |
3514 | if (!ShrCnst) { |
3515 | |
3516 | return false; |
3517 | } |
3518 | uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); |
3519 | |
3520 | |
3521 | if (OuterShiftAmt < InnerShiftAmt) { |
3522 | return false; |
3523 | } |
3524 | |
3525 | |
3526 | |
3527 | |
3528 | if (OuterShiftAmt >= Val.getValueSizeInBits()) { |
3529 | return false; |
3530 | } |
3531 | |
3532 | Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, |
3533 | MVT::i32); |
3534 | Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt, |
3535 | DL, MVT::i32); |
3536 | |
3537 | if (N->getOpcode() == ISD::SRA) { |
3538 | |
3539 | |
3540 | IsSigned = true; |
3541 | } |
3542 | } else { |
3543 | |
3544 | return false; |
3545 | } |
3546 | } else { |
3547 | |
3548 | return false; |
3549 | } |
3550 | |
3551 | |
3552 | unsigned Opc; |
3553 | |
3554 | |
3555 | if (Val.getValueType() == MVT::i32) { |
3556 | if (IsSigned) { |
3557 | Opc = NVPTX::BFE_S32rii; |
3558 | } else { |
3559 | Opc = NVPTX::BFE_U32rii; |
3560 | } |
3561 | } else if (Val.getValueType() == MVT::i64) { |
3562 | if (IsSigned) { |
3563 | Opc = NVPTX::BFE_S64rii; |
3564 | } else { |
3565 | Opc = NVPTX::BFE_U64rii; |
3566 | } |
3567 | } else { |
3568 | |
3569 | return false; |
3570 | } |
3571 | |
3572 | SDValue Ops[] = { |
3573 | Val, Start, Len |
3574 | }; |
3575 | |
3576 | ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); |
3577 | return true; |
3578 | } |
3579 | |
3580 | |
3581 | |
3582 | bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { |
3583 | |
3584 | if (N.getOpcode() == ISD::TargetGlobalAddress || |
3585 | N.getOpcode() == ISD::TargetExternalSymbol) { |
3586 | Address = N; |
3587 | return true; |
3588 | } |
3589 | if (N.getOpcode() == NVPTXISD::Wrapper) { |
3590 | Address = N.getOperand(0); |
3591 | return true; |
3592 | } |
3593 | |
3594 | if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) { |
3595 | if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && |
3596 | CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && |
3597 | CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) |
3598 | return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); |
3599 | } |
3600 | return false; |
3601 | } |
3602 | |
3603 | |
3604 | bool NVPTXDAGToDAGISel::SelectADDRsi_imp( |
3605 | SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { |
3606 | if (Addr.getOpcode() == ISD::ADD) { |
3607 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { |
3608 | SDValue base = Addr.getOperand(0); |
3609 | if (SelectDirectAddr(base, Base)) { |
3610 | Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), |
3611 | mvt); |
3612 | return true; |
3613 | } |
3614 | } |
3615 | } |
3616 | return false; |
3617 | } |
3618 | |
3619 | |
3620 | bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, |
3621 | SDValue &Base, SDValue &Offset) { |
3622 | return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); |
3623 | } |
3624 | |
3625 | |
3626 | bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, |
3627 | SDValue &Base, SDValue &Offset) { |
3628 | return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); |
3629 | } |
3630 | |
3631 | |
3632 | bool NVPTXDAGToDAGISel::SelectADDRri_imp( |
3633 | SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { |
3634 | if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { |
3635 | Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); |
3636 | Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); |
3637 | return true; |
3638 | } |
3639 | if (Addr.getOpcode() == ISD::TargetExternalSymbol || |
3640 | Addr.getOpcode() == ISD::TargetGlobalAddress) |
3641 | return false; |
3642 | |
3643 | if (Addr.getOpcode() == ISD::ADD) { |
3644 | if (SelectDirectAddr(Addr.getOperand(0), Addr)) { |
3645 | return false; |
3646 | } |
3647 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { |
3648 | if (FrameIndexSDNode *FIN = |
3649 | dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) |
3650 | |
3651 | Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); |
3652 | else |
3653 | Base = Addr.getOperand(0); |
3654 | Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), |
3655 | mvt); |
3656 | return true; |
3657 | } |
3658 | } |
3659 | return false; |
3660 | } |
3661 | |
3662 | |
3663 | bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, |
3664 | SDValue &Base, SDValue &Offset) { |
3665 | return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); |
3666 | } |
3667 | |
3668 | |
3669 | bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, |
3670 | SDValue &Base, SDValue &Offset) { |
3671 | return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); |
3672 | } |
3673 | |
3674 | bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, |
3675 | unsigned int spN) const { |
3676 | const Value *Src = nullptr; |
3677 | if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { |
3678 | if (spN == 0 && mN->getMemOperand()->getPseudoValue()) |
3679 | return true; |
3680 | Src = mN->getMemOperand()->getValue(); |
3681 | } |
3682 | if (!Src) |
3683 | return false; |
3684 | if (auto *PT = dyn_cast<PointerType>(Src->getType())) |
3685 | return (PT->getAddressSpace() == spN); |
3686 | return false; |
3687 | } |
3688 | |
3689 | |
3690 | |
3691 | bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( |
3692 | const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { |
3693 | SDValue Op0, Op1; |
3694 | switch (ConstraintID) { |
3695 | default: |
3696 | return true; |
3697 | case InlineAsm::Constraint_m: |
3698 | if (SelectDirectAddr(Op, Op0)) { |
3699 | OutOps.push_back(Op0); |
3700 | OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); |
3701 | return false; |
3702 | } |
3703 | if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { |
3704 | OutOps.push_back(Op0); |
3705 | OutOps.push_back(Op1); |
3706 | return false; |
3707 | } |
3708 | break; |
3709 | } |
3710 | return true; |
3711 | } |
3712 | |
3713 | |
3714 | |
3715 | unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, |
3716 | bool IsSigned) { |
3717 | switch (SrcTy.SimpleTy) { |
3718 | default: |
3719 | llvm_unreachable("Unhandled source type"); |
3720 | case MVT::i8: |
3721 | switch (DestTy.SimpleTy) { |
3722 | default: |
3723 | llvm_unreachable("Unhandled dest type"); |
3724 | case MVT::i16: |
3725 | return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; |
3726 | case MVT::i32: |
3727 | return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; |
3728 | case MVT::i64: |
3729 | return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; |
3730 | } |
3731 | case MVT::i16: |
3732 | switch (DestTy.SimpleTy) { |
3733 | default: |
3734 | llvm_unreachable("Unhandled dest type"); |
3735 | case MVT::i8: |
3736 | return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; |
3737 | case MVT::i32: |
3738 | return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; |
3739 | case MVT::i64: |
3740 | return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; |
3741 | } |
3742 | case MVT::i32: |
3743 | switch (DestTy.SimpleTy) { |
3744 | default: |
3745 | llvm_unreachable("Unhandled dest type"); |
3746 | case MVT::i8: |
3747 | return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; |
3748 | case MVT::i16: |
3749 | return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; |
3750 | case MVT::i64: |
3751 | return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; |
3752 | } |
3753 | case MVT::i64: |
3754 | switch (DestTy.SimpleTy) { |
3755 | default: |
3756 | llvm_unreachable("Unhandled dest type"); |
3757 | case MVT::i8: |
3758 | return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; |
3759 | case MVT::i16: |
3760 | return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; |
3761 | case MVT::i32: |
3762 | return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; |
3763 | } |
3764 | } |
3765 | } |