File: | lib/Target/NVPTX/NVPTXISelLowering.cpp |
Location: | line 2187, column 11 |
Description: | Value stored to 'Ofst' is never read |
1 | // |
2 | // The LLVM Compiler Infrastructure |
3 | // |
4 | // This file is distributed under the University of Illinois Open Source |
5 | // License. See LICENSE.TXT for details. |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "NVPTXISelLowering.h" |
15 | #include "NVPTX.h" |
16 | #include "NVPTXTargetMachine.h" |
17 | #include "NVPTXTargetObjectFile.h" |
18 | #include "NVPTXUtilities.h" |
19 | #include "llvm/CodeGen/Analysis.h" |
20 | #include "llvm/CodeGen/MachineFrameInfo.h" |
21 | #include "llvm/CodeGen/MachineFunction.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
24 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
25 | #include "llvm/IR/CallSite.h" |
26 | #include "llvm/IR/DerivedTypes.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/IR/GlobalValue.h" |
29 | #include "llvm/IR/IntrinsicInst.h" |
30 | #include "llvm/IR/Intrinsics.h" |
31 | #include "llvm/IR/Module.h" |
32 | #include "llvm/MC/MCSectionELF.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/ErrorHandling.h" |
36 | #include "llvm/Support/MathExtras.h" |
37 | #include "llvm/Support/raw_ostream.h" |
38 | #include <sstream> |
39 | |
40 | #undef DEBUG_TYPE"nvptx-lower" |
41 | #define DEBUG_TYPE"nvptx-lower" "nvptx-lower" |
42 | |
43 | using namespace llvm; |
44 | |
45 | static unsigned int uniqueCallSite = 0; |
46 | |
47 | static cl::opt<bool> sched4reg( |
48 | "nvptx-sched4reg", |
49 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); |
50 | |
51 | static cl::opt<unsigned> |
52 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, |
53 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" |
54 | " 1: do it 2: do it aggressively"), |
55 | cl::init(2)); |
56 | |
57 | static bool IsPTXVectorType(MVT VT) { |
58 | switch (VT.SimpleTy) { |
59 | default: |
60 | return false; |
61 | case MVT::v2i1: |
62 | case MVT::v4i1: |
63 | case MVT::v2i8: |
64 | case MVT::v4i8: |
65 | case MVT::v2i16: |
66 | case MVT::v4i16: |
67 | case MVT::v2i32: |
68 | case MVT::v4i32: |
69 | case MVT::v2i64: |
70 | case MVT::v2f32: |
71 | case MVT::v4f32: |
72 | case MVT::v2f64: |
73 | return true; |
74 | } |
75 | } |
76 | |
77 | /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive |
78 | /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors |
79 | /// into their primitive components. |
80 | /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the |
81 | /// same number of types as the Ins/Outs arrays in LowerFormalArguments, |
82 | /// LowerCall, and LowerReturn. |
83 | static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, |
84 | SmallVectorImpl<EVT> &ValueVTs, |
85 | SmallVectorImpl<uint64_t> *Offsets = nullptr, |
86 | uint64_t StartingOffset = 0) { |
87 | SmallVector<EVT, 16> TempVTs; |
88 | SmallVector<uint64_t, 16> TempOffsets; |
89 | |
90 | ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); |
91 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { |
92 | EVT VT = TempVTs[i]; |
93 | uint64_t Off = TempOffsets[i]; |
94 | if (VT.isVector()) |
95 | for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { |
96 | ValueVTs.push_back(VT.getVectorElementType()); |
97 | if (Offsets) |
98 | Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); |
99 | } |
100 | else { |
101 | ValueVTs.push_back(VT); |
102 | if (Offsets) |
103 | Offsets->push_back(Off); |
104 | } |
105 | } |
106 | } |
107 | |
108 | // NVPTXTargetLowering Constructor. |
109 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) |
110 | : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), |
111 | nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { |
112 | |
113 | // always lower memset, memcpy, and memmove intrinsics to load/store |
114 | // instructions, rather |
115 | // then generating calls to memset, mempcy or memmove. |
116 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; |
117 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; |
118 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; |
119 | |
120 | setBooleanContents(ZeroOrNegativeOneBooleanContent); |
121 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
122 | |
123 | // Jump is Expensive. Don't create extra control flow for 'and', 'or' |
124 | // condition branches. |
125 | setJumpIsExpensive(true); |
126 | |
127 | // By default, use the Source scheduling |
128 | if (sched4reg) |
129 | setSchedulingPreference(Sched::RegPressure); |
130 | else |
131 | setSchedulingPreference(Sched::Source); |
132 | |
133 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); |
134 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); |
135 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); |
136 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); |
137 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); |
138 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); |
139 | |
140 | // Operations not directly supported by NVPTX. |
141 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
142 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
143 | setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); |
144 | setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); |
145 | setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); |
146 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); |
147 | setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); |
148 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
149 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
150 | setOperationAction(ISD::BR_CC, MVT::i1, Expand); |
151 | setOperationAction(ISD::BR_CC, MVT::i8, Expand); |
152 | setOperationAction(ISD::BR_CC, MVT::i16, Expand); |
153 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); |
154 | setOperationAction(ISD::BR_CC, MVT::i64, Expand); |
155 | // Some SIGN_EXTEND_INREG can be done using cvt instruction. |
156 | // For others we will expand to a SHL/SRA pair. |
157 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); |
158 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
159 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); |
160 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
161 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
162 | |
163 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); |
164 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); |
165 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); |
166 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); |
167 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); |
168 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); |
169 | |
170 | if (nvptxSubtarget.hasROT64()) { |
171 | setOperationAction(ISD::ROTL, MVT::i64, Legal); |
172 | setOperationAction(ISD::ROTR, MVT::i64, Legal); |
173 | } else { |
174 | setOperationAction(ISD::ROTL, MVT::i64, Expand); |
175 | setOperationAction(ISD::ROTR, MVT::i64, Expand); |
176 | } |
177 | if (nvptxSubtarget.hasROT32()) { |
178 | setOperationAction(ISD::ROTL, MVT::i32, Legal); |
179 | setOperationAction(ISD::ROTR, MVT::i32, Legal); |
180 | } else { |
181 | setOperationAction(ISD::ROTL, MVT::i32, Expand); |
182 | setOperationAction(ISD::ROTR, MVT::i32, Expand); |
183 | } |
184 | |
185 | setOperationAction(ISD::ROTL, MVT::i16, Expand); |
186 | setOperationAction(ISD::ROTR, MVT::i16, Expand); |
187 | setOperationAction(ISD::ROTL, MVT::i8, Expand); |
188 | setOperationAction(ISD::ROTR, MVT::i8, Expand); |
189 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); |
190 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
191 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); |
192 | |
193 | // Indirect branch is not supported. |
194 | // This also disables Jump Table creation. |
195 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
196 | setOperationAction(ISD::BRIND, MVT::Other, Expand); |
197 | |
198 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
199 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
200 | |
201 | // We want to legalize constant related memmove and memcopy |
202 | // intrinsics. |
203 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
204 | |
205 | // Turn FP extload into load/fextend |
206 | setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); |
207 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); |
208 | // Turn FP truncstore into trunc + store. |
209 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
210 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
211 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
212 | |
213 | // PTX does not support load / store predicate registers |
214 | setOperationAction(ISD::LOAD, MVT::i1, Custom); |
215 | setOperationAction(ISD::STORE, MVT::i1, Custom); |
216 | |
217 | setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); |
218 | setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); |
219 | setTruncStoreAction(MVT::i64, MVT::i1, Expand); |
220 | setTruncStoreAction(MVT::i32, MVT::i1, Expand); |
221 | setTruncStoreAction(MVT::i16, MVT::i1, Expand); |
222 | setTruncStoreAction(MVT::i8, MVT::i1, Expand); |
223 | |
224 | // This is legal in NVPTX |
225 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
226 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
227 | |
228 | // TRAP can be lowered to PTX trap |
229 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
230 | |
231 | setOperationAction(ISD::ADDC, MVT::i64, Expand); |
232 | setOperationAction(ISD::ADDE, MVT::i64, Expand); |
233 | |
234 | // Register custom handling for vector loads/stores |
235 | for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; |
236 | ++i) { |
237 | MVT VT = (MVT::SimpleValueType) i; |
238 | if (IsPTXVectorType(VT)) { |
239 | setOperationAction(ISD::LOAD, VT, Custom); |
240 | setOperationAction(ISD::STORE, VT, Custom); |
241 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); |
242 | } |
243 | } |
244 | |
245 | // Custom handling for i8 intrinsics |
246 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); |
247 | |
248 | setOperationAction(ISD::CTLZ, MVT::i16, Legal); |
249 | setOperationAction(ISD::CTLZ, MVT::i32, Legal); |
250 | setOperationAction(ISD::CTLZ, MVT::i64, Legal); |
251 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); |
252 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); |
253 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); |
254 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); |
255 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); |
256 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); |
257 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); |
258 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); |
259 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); |
260 | setOperationAction(ISD::CTPOP, MVT::i16, Legal); |
261 | setOperationAction(ISD::CTPOP, MVT::i32, Legal); |
262 | setOperationAction(ISD::CTPOP, MVT::i64, Legal); |
263 | |
264 | // We have some custom DAG combine patterns for these nodes |
265 | setTargetDAGCombine(ISD::ADD); |
266 | setTargetDAGCombine(ISD::AND); |
267 | setTargetDAGCombine(ISD::FADD); |
268 | setTargetDAGCombine(ISD::MUL); |
269 | setTargetDAGCombine(ISD::SHL); |
270 | |
271 | // Now deduce the information based on the above mentioned |
272 | // actions |
273 | computeRegisterProperties(); |
274 | } |
275 | |
276 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { |
277 | switch (Opcode) { |
278 | default: |
279 | return nullptr; |
280 | case NVPTXISD::CALL: |
281 | return "NVPTXISD::CALL"; |
282 | case NVPTXISD::RET_FLAG: |
283 | return "NVPTXISD::RET_FLAG"; |
284 | case NVPTXISD::Wrapper: |
285 | return "NVPTXISD::Wrapper"; |
286 | case NVPTXISD::DeclareParam: |
287 | return "NVPTXISD::DeclareParam"; |
288 | case NVPTXISD::DeclareScalarParam: |
289 | return "NVPTXISD::DeclareScalarParam"; |
290 | case NVPTXISD::DeclareRet: |
291 | return "NVPTXISD::DeclareRet"; |
292 | case NVPTXISD::DeclareRetParam: |
293 | return "NVPTXISD::DeclareRetParam"; |
294 | case NVPTXISD::PrintCall: |
295 | return "NVPTXISD::PrintCall"; |
296 | case NVPTXISD::LoadParam: |
297 | return "NVPTXISD::LoadParam"; |
298 | case NVPTXISD::LoadParamV2: |
299 | return "NVPTXISD::LoadParamV2"; |
300 | case NVPTXISD::LoadParamV4: |
301 | return "NVPTXISD::LoadParamV4"; |
302 | case NVPTXISD::StoreParam: |
303 | return "NVPTXISD::StoreParam"; |
304 | case NVPTXISD::StoreParamV2: |
305 | return "NVPTXISD::StoreParamV2"; |
306 | case NVPTXISD::StoreParamV4: |
307 | return "NVPTXISD::StoreParamV4"; |
308 | case NVPTXISD::StoreParamS32: |
309 | return "NVPTXISD::StoreParamS32"; |
310 | case NVPTXISD::StoreParamU32: |
311 | return "NVPTXISD::StoreParamU32"; |
312 | case NVPTXISD::CallArgBegin: |
313 | return "NVPTXISD::CallArgBegin"; |
314 | case NVPTXISD::CallArg: |
315 | return "NVPTXISD::CallArg"; |
316 | case NVPTXISD::LastCallArg: |
317 | return "NVPTXISD::LastCallArg"; |
318 | case NVPTXISD::CallArgEnd: |
319 | return "NVPTXISD::CallArgEnd"; |
320 | case NVPTXISD::CallVoid: |
321 | return "NVPTXISD::CallVoid"; |
322 | case NVPTXISD::CallVal: |
323 | return "NVPTXISD::CallVal"; |
324 | case NVPTXISD::CallSymbol: |
325 | return "NVPTXISD::CallSymbol"; |
326 | case NVPTXISD::Prototype: |
327 | return "NVPTXISD::Prototype"; |
328 | case NVPTXISD::MoveParam: |
329 | return "NVPTXISD::MoveParam"; |
330 | case NVPTXISD::StoreRetval: |
331 | return "NVPTXISD::StoreRetval"; |
332 | case NVPTXISD::StoreRetvalV2: |
333 | return "NVPTXISD::StoreRetvalV2"; |
334 | case NVPTXISD::StoreRetvalV4: |
335 | return "NVPTXISD::StoreRetvalV4"; |
336 | case NVPTXISD::PseudoUseParam: |
337 | return "NVPTXISD::PseudoUseParam"; |
338 | case NVPTXISD::RETURN: |
339 | return "NVPTXISD::RETURN"; |
340 | case NVPTXISD::CallSeqBegin: |
341 | return "NVPTXISD::CallSeqBegin"; |
342 | case NVPTXISD::CallSeqEnd: |
343 | return "NVPTXISD::CallSeqEnd"; |
344 | case NVPTXISD::CallPrototype: |
345 | return "NVPTXISD::CallPrototype"; |
346 | case NVPTXISD::LoadV2: |
347 | return "NVPTXISD::LoadV2"; |
348 | case NVPTXISD::LoadV4: |
349 | return "NVPTXISD::LoadV4"; |
350 | case NVPTXISD::LDGV2: |
351 | return "NVPTXISD::LDGV2"; |
352 | case NVPTXISD::LDGV4: |
353 | return "NVPTXISD::LDGV4"; |
354 | case NVPTXISD::LDUV2: |
355 | return "NVPTXISD::LDUV2"; |
356 | case NVPTXISD::LDUV4: |
357 | return "NVPTXISD::LDUV4"; |
358 | case NVPTXISD::StoreV2: |
359 | return "NVPTXISD::StoreV2"; |
360 | case NVPTXISD::StoreV4: |
361 | return "NVPTXISD::StoreV4"; |
362 | case NVPTXISD::FUN_SHFL_CLAMP: |
363 | return "NVPTXISD::FUN_SHFL_CLAMP"; |
364 | case NVPTXISD::FUN_SHFR_CLAMP: |
365 | return "NVPTXISD::FUN_SHFR_CLAMP"; |
366 | case NVPTXISD::IMAD: |
367 | return "NVPTXISD::IMAD"; |
368 | case NVPTXISD::MUL_WIDE_SIGNED: |
369 | return "NVPTXISD::MUL_WIDE_SIGNED"; |
370 | case NVPTXISD::MUL_WIDE_UNSIGNED: |
371 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; |
372 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; |
373 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; |
374 | case NVPTXISD::Tex1DFloatFloatLevel: |
375 | return "NVPTXISD::Tex1DFloatFloatLevel"; |
376 | case NVPTXISD::Tex1DFloatFloatGrad: |
377 | return "NVPTXISD::Tex1DFloatFloatGrad"; |
378 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; |
379 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; |
380 | case NVPTXISD::Tex1DS32FloatLevel: |
381 | return "NVPTXISD::Tex1DS32FloatLevel"; |
382 | case NVPTXISD::Tex1DS32FloatGrad: |
383 | return "NVPTXISD::Tex1DS32FloatGrad"; |
384 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; |
385 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; |
386 | case NVPTXISD::Tex1DU32FloatLevel: |
387 | return "NVPTXISD::Tex1DU32FloatLevel"; |
388 | case NVPTXISD::Tex1DU32FloatGrad: |
389 | return "NVPTXISD::Tex1DU32FloatGrad"; |
390 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; |
391 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; |
392 | case NVPTXISD::Tex1DArrayFloatFloatLevel: |
393 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; |
394 | case NVPTXISD::Tex1DArrayFloatFloatGrad: |
395 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; |
396 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; |
397 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; |
398 | case NVPTXISD::Tex1DArrayS32FloatLevel: |
399 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; |
400 | case NVPTXISD::Tex1DArrayS32FloatGrad: |
401 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; |
402 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; |
403 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; |
404 | case NVPTXISD::Tex1DArrayU32FloatLevel: |
405 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; |
406 | case NVPTXISD::Tex1DArrayU32FloatGrad: |
407 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; |
408 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; |
409 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; |
410 | case NVPTXISD::Tex2DFloatFloatLevel: |
411 | return "NVPTXISD::Tex2DFloatFloatLevel"; |
412 | case NVPTXISD::Tex2DFloatFloatGrad: |
413 | return "NVPTXISD::Tex2DFloatFloatGrad"; |
414 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; |
415 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; |
416 | case NVPTXISD::Tex2DS32FloatLevel: |
417 | return "NVPTXISD::Tex2DS32FloatLevel"; |
418 | case NVPTXISD::Tex2DS32FloatGrad: |
419 | return "NVPTXISD::Tex2DS32FloatGrad"; |
420 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; |
421 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; |
422 | case NVPTXISD::Tex2DU32FloatLevel: |
423 | return "NVPTXISD::Tex2DU32FloatLevel"; |
424 | case NVPTXISD::Tex2DU32FloatGrad: |
425 | return "NVPTXISD::Tex2DU32FloatGrad"; |
426 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; |
427 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; |
428 | case NVPTXISD::Tex2DArrayFloatFloatLevel: |
429 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; |
430 | case NVPTXISD::Tex2DArrayFloatFloatGrad: |
431 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; |
432 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; |
433 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; |
434 | case NVPTXISD::Tex2DArrayS32FloatLevel: |
435 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; |
436 | case NVPTXISD::Tex2DArrayS32FloatGrad: |
437 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; |
438 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; |
439 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; |
440 | case NVPTXISD::Tex2DArrayU32FloatLevel: |
441 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; |
442 | case NVPTXISD::Tex2DArrayU32FloatGrad: |
443 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; |
444 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; |
445 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; |
446 | case NVPTXISD::Tex3DFloatFloatLevel: |
447 | return "NVPTXISD::Tex3DFloatFloatLevel"; |
448 | case NVPTXISD::Tex3DFloatFloatGrad: |
449 | return "NVPTXISD::Tex3DFloatFloatGrad"; |
450 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; |
451 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; |
452 | case NVPTXISD::Tex3DS32FloatLevel: |
453 | return "NVPTXISD::Tex3DS32FloatLevel"; |
454 | case NVPTXISD::Tex3DS32FloatGrad: |
455 | return "NVPTXISD::Tex3DS32FloatGrad"; |
456 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; |
457 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; |
458 | case NVPTXISD::Tex3DU32FloatLevel: |
459 | return "NVPTXISD::Tex3DU32FloatLevel"; |
460 | case NVPTXISD::Tex3DU32FloatGrad: |
461 | return "NVPTXISD::Tex3DU32FloatGrad"; |
462 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; |
463 | case NVPTXISD::TexCubeFloatFloatLevel: |
464 | return "NVPTXISD::TexCubeFloatFloatLevel"; |
465 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; |
466 | case NVPTXISD::TexCubeS32FloatLevel: |
467 | return "NVPTXISD::TexCubeS32FloatLevel"; |
468 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; |
469 | case NVPTXISD::TexCubeU32FloatLevel: |
470 | return "NVPTXISD::TexCubeU32FloatLevel"; |
471 | case NVPTXISD::TexCubeArrayFloatFloat: |
472 | return "NVPTXISD::TexCubeArrayFloatFloat"; |
473 | case NVPTXISD::TexCubeArrayFloatFloatLevel: |
474 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; |
475 | case NVPTXISD::TexCubeArrayS32Float: |
476 | return "NVPTXISD::TexCubeArrayS32Float"; |
477 | case NVPTXISD::TexCubeArrayS32FloatLevel: |
478 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; |
479 | case NVPTXISD::TexCubeArrayU32Float: |
480 | return "NVPTXISD::TexCubeArrayU32Float"; |
481 | case NVPTXISD::TexCubeArrayU32FloatLevel: |
482 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; |
483 | case NVPTXISD::Tld4R2DFloatFloat: |
484 | return "NVPTXISD::Tld4R2DFloatFloat"; |
485 | case NVPTXISD::Tld4G2DFloatFloat: |
486 | return "NVPTXISD::Tld4G2DFloatFloat"; |
487 | case NVPTXISD::Tld4B2DFloatFloat: |
488 | return "NVPTXISD::Tld4B2DFloatFloat"; |
489 | case NVPTXISD::Tld4A2DFloatFloat: |
490 | return "NVPTXISD::Tld4A2DFloatFloat"; |
491 | case NVPTXISD::Tld4R2DS64Float: |
492 | return "NVPTXISD::Tld4R2DS64Float"; |
493 | case NVPTXISD::Tld4G2DS64Float: |
494 | return "NVPTXISD::Tld4G2DS64Float"; |
495 | case NVPTXISD::Tld4B2DS64Float: |
496 | return "NVPTXISD::Tld4B2DS64Float"; |
497 | case NVPTXISD::Tld4A2DS64Float: |
498 | return "NVPTXISD::Tld4A2DS64Float"; |
499 | case NVPTXISD::Tld4R2DU64Float: |
500 | return "NVPTXISD::Tld4R2DU64Float"; |
501 | case NVPTXISD::Tld4G2DU64Float: |
502 | return "NVPTXISD::Tld4G2DU64Float"; |
503 | case NVPTXISD::Tld4B2DU64Float: |
504 | return "NVPTXISD::Tld4B2DU64Float"; |
505 | case NVPTXISD::Tld4A2DU64Float: |
506 | return "NVPTXISD::Tld4A2DU64Float"; |
507 | |
508 | case NVPTXISD::TexUnified1DFloatS32: |
509 | return "NVPTXISD::TexUnified1DFloatS32"; |
510 | case NVPTXISD::TexUnified1DFloatFloat: |
511 | return "NVPTXISD::TexUnified1DFloatFloat"; |
512 | case NVPTXISD::TexUnified1DFloatFloatLevel: |
513 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; |
514 | case NVPTXISD::TexUnified1DFloatFloatGrad: |
515 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; |
516 | case NVPTXISD::TexUnified1DS32S32: |
517 | return "NVPTXISD::TexUnified1DS32S32"; |
518 | case NVPTXISD::TexUnified1DS32Float: |
519 | return "NVPTXISD::TexUnified1DS32Float"; |
520 | case NVPTXISD::TexUnified1DS32FloatLevel: |
521 | return "NVPTXISD::TexUnified1DS32FloatLevel"; |
522 | case NVPTXISD::TexUnified1DS32FloatGrad: |
523 | return "NVPTXISD::TexUnified1DS32FloatGrad"; |
524 | case NVPTXISD::TexUnified1DU32S32: |
525 | return "NVPTXISD::TexUnified1DU32S32"; |
526 | case NVPTXISD::TexUnified1DU32Float: |
527 | return "NVPTXISD::TexUnified1DU32Float"; |
528 | case NVPTXISD::TexUnified1DU32FloatLevel: |
529 | return "NVPTXISD::TexUnified1DU32FloatLevel"; |
530 | case NVPTXISD::TexUnified1DU32FloatGrad: |
531 | return "NVPTXISD::TexUnified1DU32FloatGrad"; |
532 | case NVPTXISD::TexUnified1DArrayFloatS32: |
533 | return "NVPTXISD::TexUnified1DArrayFloatS32"; |
534 | case NVPTXISD::TexUnified1DArrayFloatFloat: |
535 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; |
536 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: |
537 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; |
538 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: |
539 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; |
540 | case NVPTXISD::TexUnified1DArrayS32S32: |
541 | return "NVPTXISD::TexUnified1DArrayS32S32"; |
542 | case NVPTXISD::TexUnified1DArrayS32Float: |
543 | return "NVPTXISD::TexUnified1DArrayS32Float"; |
544 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: |
545 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; |
546 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: |
547 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; |
548 | case NVPTXISD::TexUnified1DArrayU32S32: |
549 | return "NVPTXISD::TexUnified1DArrayU32S32"; |
550 | case NVPTXISD::TexUnified1DArrayU32Float: |
551 | return "NVPTXISD::TexUnified1DArrayU32Float"; |
552 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: |
553 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; |
554 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: |
555 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; |
556 | case NVPTXISD::TexUnified2DFloatS32: |
557 | return "NVPTXISD::TexUnified2DFloatS32"; |
558 | case NVPTXISD::TexUnified2DFloatFloat: |
559 | return "NVPTXISD::TexUnified2DFloatFloat"; |
560 | case NVPTXISD::TexUnified2DFloatFloatLevel: |
561 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; |
562 | case NVPTXISD::TexUnified2DFloatFloatGrad: |
563 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; |
564 | case NVPTXISD::TexUnified2DS32S32: |
565 | return "NVPTXISD::TexUnified2DS32S32"; |
566 | case NVPTXISD::TexUnified2DS32Float: |
567 | return "NVPTXISD::TexUnified2DS32Float"; |
568 | case NVPTXISD::TexUnified2DS32FloatLevel: |
569 | return "NVPTXISD::TexUnified2DS32FloatLevel"; |
570 | case NVPTXISD::TexUnified2DS32FloatGrad: |
571 | return "NVPTXISD::TexUnified2DS32FloatGrad"; |
572 | case NVPTXISD::TexUnified2DU32S32: |
573 | return "NVPTXISD::TexUnified2DU32S32"; |
574 | case NVPTXISD::TexUnified2DU32Float: |
575 | return "NVPTXISD::TexUnified2DU32Float"; |
576 | case NVPTXISD::TexUnified2DU32FloatLevel: |
577 | return "NVPTXISD::TexUnified2DU32FloatLevel"; |
578 | case NVPTXISD::TexUnified2DU32FloatGrad: |
579 | return "NVPTXISD::TexUnified2DU32FloatGrad"; |
580 | case NVPTXISD::TexUnified2DArrayFloatS32: |
581 | return "NVPTXISD::TexUnified2DArrayFloatS32"; |
582 | case NVPTXISD::TexUnified2DArrayFloatFloat: |
583 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; |
584 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: |
585 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; |
586 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: |
587 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; |
588 | case NVPTXISD::TexUnified2DArrayS32S32: |
589 | return "NVPTXISD::TexUnified2DArrayS32S32"; |
590 | case NVPTXISD::TexUnified2DArrayS32Float: |
591 | return "NVPTXISD::TexUnified2DArrayS32Float"; |
592 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: |
593 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; |
594 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: |
595 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; |
596 | case NVPTXISD::TexUnified2DArrayU32S32: |
597 | return "NVPTXISD::TexUnified2DArrayU32S32"; |
598 | case NVPTXISD::TexUnified2DArrayU32Float: |
599 | return "NVPTXISD::TexUnified2DArrayU32Float"; |
600 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: |
601 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; |
602 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: |
603 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; |
604 | case NVPTXISD::TexUnified3DFloatS32: |
605 | return "NVPTXISD::TexUnified3DFloatS32"; |
606 | case NVPTXISD::TexUnified3DFloatFloat: |
607 | return "NVPTXISD::TexUnified3DFloatFloat"; |
608 | case NVPTXISD::TexUnified3DFloatFloatLevel: |
609 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; |
610 | case NVPTXISD::TexUnified3DFloatFloatGrad: |
611 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; |
612 | case NVPTXISD::TexUnified3DS32S32: |
613 | return "NVPTXISD::TexUnified3DS32S32"; |
614 | case NVPTXISD::TexUnified3DS32Float: |
615 | return "NVPTXISD::TexUnified3DS32Float"; |
616 | case NVPTXISD::TexUnified3DS32FloatLevel: |
617 | return "NVPTXISD::TexUnified3DS32FloatLevel"; |
618 | case NVPTXISD::TexUnified3DS32FloatGrad: |
619 | return "NVPTXISD::TexUnified3DS32FloatGrad"; |
620 | case NVPTXISD::TexUnified3DU32S32: |
621 | return "NVPTXISD::TexUnified3DU32S32"; |
622 | case NVPTXISD::TexUnified3DU32Float: |
623 | return "NVPTXISD::TexUnified3DU32Float"; |
624 | case NVPTXISD::TexUnified3DU32FloatLevel: |
625 | return "NVPTXISD::TexUnified3DU32FloatLevel"; |
626 | case NVPTXISD::TexUnified3DU32FloatGrad: |
627 | return "NVPTXISD::TexUnified3DU32FloatGrad"; |
628 | case NVPTXISD::TexUnifiedCubeFloatFloat: |
629 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; |
630 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: |
631 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; |
632 | case NVPTXISD::TexUnifiedCubeS32Float: |
633 | return "NVPTXISD::TexUnifiedCubeS32Float"; |
634 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: |
635 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; |
636 | case NVPTXISD::TexUnifiedCubeU32Float: |
637 | return "NVPTXISD::TexUnifiedCubeU32Float"; |
638 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: |
639 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; |
640 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: |
641 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; |
642 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: |
643 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; |
644 | case NVPTXISD::TexUnifiedCubeArrayS32Float: |
645 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; |
646 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: |
647 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; |
648 | case NVPTXISD::TexUnifiedCubeArrayU32Float: |
649 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; |
650 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: |
651 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; |
652 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: |
653 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; |
654 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: |
655 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; |
656 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: |
657 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; |
658 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: |
659 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; |
660 | case NVPTXISD::Tld4UnifiedR2DS64Float: |
661 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; |
662 | case NVPTXISD::Tld4UnifiedG2DS64Float: |
663 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; |
664 | case NVPTXISD::Tld4UnifiedB2DS64Float: |
665 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; |
666 | case NVPTXISD::Tld4UnifiedA2DS64Float: |
667 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; |
668 | case NVPTXISD::Tld4UnifiedR2DU64Float: |
669 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; |
670 | case NVPTXISD::Tld4UnifiedG2DU64Float: |
671 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; |
672 | case NVPTXISD::Tld4UnifiedB2DU64Float: |
673 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; |
674 | case NVPTXISD::Tld4UnifiedA2DU64Float: |
675 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; |
676 | |
677 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; |
678 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; |
679 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; |
680 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; |
681 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; |
682 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; |
683 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; |
684 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; |
685 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; |
686 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; |
687 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; |
688 | |
689 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; |
690 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; |
691 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; |
692 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; |
693 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; |
694 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; |
695 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; |
696 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; |
697 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; |
698 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; |
699 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; |
700 | |
701 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; |
702 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; |
703 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; |
704 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; |
705 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; |
706 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; |
707 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; |
708 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; |
709 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; |
710 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; |
711 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; |
712 | |
713 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; |
714 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; |
715 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; |
716 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; |
717 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; |
718 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; |
719 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; |
720 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; |
721 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; |
722 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; |
723 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; |
724 | |
725 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; |
726 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; |
727 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; |
728 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; |
729 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; |
730 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; |
731 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; |
732 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; |
733 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; |
734 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; |
735 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; |
736 | |
737 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; |
738 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; |
739 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; |
740 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; |
741 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; |
742 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; |
743 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; |
744 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; |
745 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; |
746 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; |
747 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; |
748 | |
749 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; |
750 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; |
751 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; |
752 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; |
753 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; |
754 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; |
755 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; |
756 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; |
757 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; |
758 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; |
759 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; |
760 | |
761 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; |
762 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; |
763 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; |
764 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; |
765 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; |
766 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; |
767 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; |
768 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; |
769 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; |
770 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; |
771 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; |
772 | |
773 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; |
774 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; |
775 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; |
776 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; |
777 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; |
778 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; |
779 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; |
780 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; |
781 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; |
782 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; |
783 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; |
784 | |
785 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; |
786 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; |
787 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; |
788 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; |
789 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; |
790 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; |
791 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; |
792 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; |
793 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; |
794 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; |
795 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; |
796 | |
797 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; |
798 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; |
799 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; |
800 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; |
801 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; |
802 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; |
803 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; |
804 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; |
805 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; |
806 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; |
807 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; |
808 | |
809 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; |
810 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; |
811 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; |
812 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; |
813 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; |
814 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; |
815 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; |
816 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; |
817 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; |
818 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; |
819 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; |
820 | |
821 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; |
822 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; |
823 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; |
824 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; |
825 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; |
826 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; |
827 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; |
828 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; |
829 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; |
830 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; |
831 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; |
832 | |
833 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; |
834 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; |
835 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; |
836 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; |
837 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; |
838 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; |
839 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; |
840 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; |
841 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; |
842 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; |
843 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; |
844 | |
845 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; |
846 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; |
847 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; |
848 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; |
849 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; |
850 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; |
851 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; |
852 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; |
853 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; |
854 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; |
855 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; |
856 | } |
857 | } |
858 | |
859 | TargetLoweringBase::LegalizeTypeAction |
860 | NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { |
861 | if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) |
862 | return TypeSplitVector; |
863 | |
864 | return TargetLoweringBase::getPreferredVectorAction(VT); |
865 | } |
866 | |
867 | SDValue |
868 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { |
869 | SDLoc dl(Op); |
870 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); |
871 | Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); |
872 | return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); |
873 | } |
874 | |
875 | std::string |
876 | NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, |
877 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
878 | unsigned retAlignment, |
879 | const ImmutableCallSite *CS) const { |
880 | |
881 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); |
882 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 882, __PRETTY_FUNCTION__)); |
883 | if (!isABI) |
884 | return ""; |
885 | |
886 | std::stringstream O; |
887 | O << "prototype_" << uniqueCallSite << " : .callprototype "; |
888 | |
889 | if (retTy->getTypeID() == Type::VoidTyID) { |
890 | O << "()"; |
891 | } else { |
892 | O << "("; |
893 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { |
894 | unsigned size = 0; |
895 | if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { |
896 | size = ITy->getBitWidth(); |
897 | if (size < 32) |
898 | size = 32; |
899 | } else { |
900 | assert(retTy->isFloatingPointTy() &&((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 901, __PRETTY_FUNCTION__)) |
901 | "Floating point type expected here")((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 901, __PRETTY_FUNCTION__)); |
902 | size = retTy->getPrimitiveSizeInBits(); |
903 | } |
904 | |
905 | O << ".param .b" << size << " _"; |
906 | } else if (isa<PointerType>(retTy)) { |
907 | O << ".param .b" << getPointerTy().getSizeInBits() << " _"; |
908 | } else { |
909 | if((retTy->getTypeID() == Type::StructTyID) || |
910 | isa<VectorType>(retTy)) { |
911 | O << ".param .align " |
912 | << retAlignment |
913 | << " .b8 _[" |
914 | << getDataLayout()->getTypeAllocSize(retTy) << "]"; |
915 | } else { |
916 | assert(false && "Unknown return type")((false && "Unknown return type") ? static_cast<void > (0) : __assert_fail ("false && \"Unknown return type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 916, __PRETTY_FUNCTION__)); |
917 | } |
918 | } |
919 | O << ") "; |
920 | } |
921 | O << "_ ("; |
922 | |
923 | bool first = true; |
924 | MVT thePointerTy = getPointerTy(); |
925 | |
926 | unsigned OIdx = 0; |
927 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { |
928 | Type *Ty = Args[i].Ty; |
929 | if (!first) { |
930 | O << ", "; |
931 | } |
932 | first = false; |
933 | |
934 | if (Outs[OIdx].Flags.isByVal() == false) { |
935 | if (Ty->isAggregateType() || Ty->isVectorTy()) { |
936 | unsigned align = 0; |
937 | const CallInst *CallI = cast<CallInst>(CS->getInstruction()); |
938 | const DataLayout *TD = getDataLayout(); |
939 | // +1 because index 0 is reserved for return type alignment |
940 | if (!llvm::getAlign(*CallI, i + 1, align)) |
941 | align = TD->getABITypeAlignment(Ty); |
942 | unsigned sz = TD->getTypeAllocSize(Ty); |
943 | O << ".param .align " << align << " .b8 "; |
944 | O << "_"; |
945 | O << "[" << sz << "]"; |
946 | // update the index for Outs |
947 | SmallVector<EVT, 16> vtparts; |
948 | ComputeValueVTs(*this, Ty, vtparts); |
949 | if (unsigned len = vtparts.size()) |
950 | OIdx += len - 1; |
951 | continue; |
952 | } |
953 | // i8 types in IR will be i16 types in SDAG |
954 | assert((getValueType(Ty) == Outs[OIdx].VT ||(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)) |
955 | (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)) |
956 | "type mismatch between callee prototype and arguments")(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)); |
957 | // scalar type |
958 | unsigned sz = 0; |
959 | if (isa<IntegerType>(Ty)) { |
960 | sz = cast<IntegerType>(Ty)->getBitWidth(); |
961 | if (sz < 32) |
962 | sz = 32; |
963 | } else if (isa<PointerType>(Ty)) |
964 | sz = thePointerTy.getSizeInBits(); |
965 | else |
966 | sz = Ty->getPrimitiveSizeInBits(); |
967 | O << ".param .b" << sz << " "; |
968 | O << "_"; |
969 | continue; |
970 | } |
971 | const PointerType *PTy = dyn_cast<PointerType>(Ty); |
972 | assert(PTy && "Param with byval attribute should be a pointer type")((PTy && "Param with byval attribute should be a pointer type" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Param with byval attribute should be a pointer type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 972, __PRETTY_FUNCTION__)); |
973 | Type *ETy = PTy->getElementType(); |
974 | |
975 | unsigned align = Outs[OIdx].Flags.getByValAlign(); |
976 | unsigned sz = getDataLayout()->getTypeAllocSize(ETy); |
977 | O << ".param .align " << align << " .b8 "; |
978 | O << "_"; |
979 | O << "[" << sz << "]"; |
980 | } |
981 | O << ");"; |
982 | return O.str(); |
983 | } |
984 | |
985 | unsigned |
986 | NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, |
987 | const ImmutableCallSite *CS, |
988 | Type *Ty, |
989 | unsigned Idx) const { |
990 | const DataLayout *TD = getDataLayout(); |
991 | unsigned Align = 0; |
992 | const Value *DirectCallee = CS->getCalledFunction(); |
993 | |
994 | if (!DirectCallee) { |
995 | // We don't have a direct function symbol, but that may be because of |
996 | // constant cast instructions in the call. |
997 | const Instruction *CalleeI = CS->getInstruction(); |
998 | assert(CalleeI && "Call target is not a function or derived value?")((CalleeI && "Call target is not a function or derived value?" ) ? static_cast<void> (0) : __assert_fail ("CalleeI && \"Call target is not a function or derived value?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 998, __PRETTY_FUNCTION__)); |
999 | |
1000 | // With bitcast'd call targets, the instruction will be the call |
1001 | if (isa<CallInst>(CalleeI)) { |
1002 | // Check if we have call alignment metadata |
1003 | if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) |
1004 | return Align; |
1005 | |
1006 | const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); |
1007 | // Ignore any bitcast instructions |
1008 | while(isa<ConstantExpr>(CalleeV)) { |
1009 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); |
1010 | if (!CE->isCast()) |
1011 | break; |
1012 | // Look through the bitcast |
1013 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); |
1014 | } |
1015 | |
1016 | // We have now looked past all of the bitcasts. Do we finally have a |
1017 | // Function? |
1018 | if (isa<Function>(CalleeV)) |
1019 | DirectCallee = CalleeV; |
1020 | } |
1021 | } |
1022 | |
1023 | // Check for function alignment information if we found that the |
1024 | // ultimate target is a Function |
1025 | if (DirectCallee) |
1026 | if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) |
1027 | return Align; |
1028 | |
1029 | // Call is indirect or alignment information is not available, fall back to |
1030 | // the ABI type alignment |
1031 | return TD->getABITypeAlignment(Ty); |
1032 | } |
1033 | |
1034 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
1035 | SmallVectorImpl<SDValue> &InVals) const { |
1036 | SelectionDAG &DAG = CLI.DAG; |
1037 | SDLoc dl = CLI.DL; |
1038 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
1039 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
1040 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
1041 | SDValue Chain = CLI.Chain; |
1042 | SDValue Callee = CLI.Callee; |
1043 | bool &isTailCall = CLI.IsTailCall; |
1044 | ArgListTy &Args = CLI.getArgs(); |
1045 | Type *retTy = CLI.RetTy; |
1046 | ImmutableCallSite *CS = CLI.CS; |
1047 | |
1048 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); |
1049 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1049, __PRETTY_FUNCTION__)); |
1050 | if (!isABI) |
1051 | return Chain; |
1052 | const DataLayout *TD = getDataLayout(); |
1053 | MachineFunction &MF = DAG.getMachineFunction(); |
1054 | const Function *F = MF.getFunction(); |
1055 | |
1056 | SDValue tempChain = Chain; |
1057 | Chain = |
1058 | DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), |
1059 | dl); |
1060 | SDValue InFlag = Chain.getValue(1); |
1061 | |
1062 | unsigned paramCount = 0; |
1063 | // Args.size() and Outs.size() need not match. |
1064 | // Outs.size() will be larger |
1065 | // * if there is an aggregate argument with multiple fields (each field |
1066 | // showing up separately in Outs) |
1067 | // * if there is a vector argument with more than typical vector-length |
1068 | // elements (generally if more than 4) where each vector element is |
1069 | // individually present in Outs. |
1070 | // So a different index should be used for indexing into Outs/OutVals. |
1071 | // See similar issue in LowerFormalArguments. |
1072 | unsigned OIdx = 0; |
1073 | // Declare the .params or .reg need to pass values |
1074 | // to the function |
1075 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { |
1076 | EVT VT = Outs[OIdx].VT; |
1077 | Type *Ty = Args[i].Ty; |
1078 | |
1079 | if (Outs[OIdx].Flags.isByVal() == false) { |
1080 | if (Ty->isAggregateType()) { |
1081 | // aggregate |
1082 | SmallVector<EVT, 16> vtparts; |
1083 | SmallVector<uint64_t, 16> Offsets; |
1084 | ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); |
1085 | |
1086 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); |
1087 | // declare .param .align <align> .b8 .param<n>[<size>]; |
1088 | unsigned sz = TD->getTypeAllocSize(Ty); |
1089 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1090 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), |
1091 | DAG.getConstant(paramCount, MVT::i32), |
1092 | DAG.getConstant(sz, MVT::i32), InFlag }; |
1093 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, |
1094 | DeclareParamOps); |
1095 | InFlag = Chain.getValue(1); |
1096 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { |
1097 | EVT elemtype = vtparts[j]; |
1098 | unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); |
1099 | if (elemtype.isInteger() && (sz < 8)) |
1100 | sz = 8; |
1101 | SDValue StVal = OutVals[OIdx]; |
1102 | if (elemtype.getSizeInBits() < 16) { |
1103 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); |
1104 | } |
1105 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1106 | SDValue CopyParamOps[] = { Chain, |
1107 | DAG.getConstant(paramCount, MVT::i32), |
1108 | DAG.getConstant(Offsets[j], MVT::i32), |
1109 | StVal, InFlag }; |
1110 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, |
1111 | CopyParamVTs, CopyParamOps, |
1112 | elemtype, MachinePointerInfo(), |
1113 | ArgAlign); |
1114 | InFlag = Chain.getValue(1); |
1115 | ++OIdx; |
1116 | } |
1117 | if (vtparts.size() > 0) |
1118 | --OIdx; |
1119 | ++paramCount; |
1120 | continue; |
1121 | } |
1122 | if (Ty->isVectorTy()) { |
1123 | EVT ObjectVT = getValueType(Ty); |
1124 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); |
1125 | // declare .param .align <align> .b8 .param<n>[<size>]; |
1126 | unsigned sz = TD->getTypeAllocSize(Ty); |
1127 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1128 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), |
1129 | DAG.getConstant(paramCount, MVT::i32), |
1130 | DAG.getConstant(sz, MVT::i32), InFlag }; |
1131 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, |
1132 | DeclareParamOps); |
1133 | InFlag = Chain.getValue(1); |
1134 | unsigned NumElts = ObjectVT.getVectorNumElements(); |
1135 | EVT EltVT = ObjectVT.getVectorElementType(); |
1136 | EVT MemVT = EltVT; |
1137 | bool NeedExtend = false; |
1138 | if (EltVT.getSizeInBits() < 16) { |
1139 | NeedExtend = true; |
1140 | EltVT = MVT::i16; |
1141 | } |
1142 | |
1143 | // V1 store |
1144 | if (NumElts == 1) { |
1145 | SDValue Elt = OutVals[OIdx++]; |
1146 | if (NeedExtend) |
1147 | Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); |
1148 | |
1149 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1150 | SDValue CopyParamOps[] = { Chain, |
1151 | DAG.getConstant(paramCount, MVT::i32), |
1152 | DAG.getConstant(0, MVT::i32), Elt, |
1153 | InFlag }; |
1154 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, |
1155 | CopyParamVTs, CopyParamOps, |
1156 | MemVT, MachinePointerInfo()); |
1157 | InFlag = Chain.getValue(1); |
1158 | } else if (NumElts == 2) { |
1159 | SDValue Elt0 = OutVals[OIdx++]; |
1160 | SDValue Elt1 = OutVals[OIdx++]; |
1161 | if (NeedExtend) { |
1162 | Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); |
1163 | Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); |
1164 | } |
1165 | |
1166 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1167 | SDValue CopyParamOps[] = { Chain, |
1168 | DAG.getConstant(paramCount, MVT::i32), |
1169 | DAG.getConstant(0, MVT::i32), Elt0, Elt1, |
1170 | InFlag }; |
1171 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, |
1172 | CopyParamVTs, CopyParamOps, |
1173 | MemVT, MachinePointerInfo()); |
1174 | InFlag = Chain.getValue(1); |
1175 | } else { |
1176 | unsigned curOffset = 0; |
1177 | // V4 stores |
1178 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and |
1179 | // the |
1180 | // vector will be expanded to a power of 2 elements, so we know we can |
1181 | // always round up to the next multiple of 4 when creating the vector |
1182 | // stores. |
1183 | // e.g. 4 elem => 1 st.v4 |
1184 | // 6 elem => 2 st.v4 |
1185 | // 8 elem => 2 st.v4 |
1186 | // 11 elem => 3 st.v4 |
1187 | unsigned VecSize = 4; |
1188 | if (EltVT.getSizeInBits() == 64) |
1189 | VecSize = 2; |
1190 | |
1191 | // This is potentially only part of a vector, so assume all elements |
1192 | // are packed together. |
1193 | unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; |
1194 | |
1195 | for (unsigned i = 0; i < NumElts; i += VecSize) { |
1196 | // Get values |
1197 | SDValue StoreVal; |
1198 | SmallVector<SDValue, 8> Ops; |
1199 | Ops.push_back(Chain); |
1200 | Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); |
1201 | Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); |
1202 | |
1203 | unsigned Opc = NVPTXISD::StoreParamV2; |
1204 | |
1205 | StoreVal = OutVals[OIdx++]; |
1206 | if (NeedExtend) |
1207 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); |
1208 | Ops.push_back(StoreVal); |
1209 | |
1210 | if (i + 1 < NumElts) { |
1211 | StoreVal = OutVals[OIdx++]; |
1212 | if (NeedExtend) |
1213 | StoreVal = |
1214 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); |
1215 | } else { |
1216 | StoreVal = DAG.getUNDEF(EltVT); |
1217 | } |
1218 | Ops.push_back(StoreVal); |
1219 | |
1220 | if (VecSize == 4) { |
1221 | Opc = NVPTXISD::StoreParamV4; |
1222 | if (i + 2 < NumElts) { |
1223 | StoreVal = OutVals[OIdx++]; |
1224 | if (NeedExtend) |
1225 | StoreVal = |
1226 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); |
1227 | } else { |
1228 | StoreVal = DAG.getUNDEF(EltVT); |
1229 | } |
1230 | Ops.push_back(StoreVal); |
1231 | |
1232 | if (i + 3 < NumElts) { |
1233 | StoreVal = OutVals[OIdx++]; |
1234 | if (NeedExtend) |
1235 | StoreVal = |
1236 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); |
1237 | } else { |
1238 | StoreVal = DAG.getUNDEF(EltVT); |
1239 | } |
1240 | Ops.push_back(StoreVal); |
1241 | } |
1242 | |
1243 | Ops.push_back(InFlag); |
1244 | |
1245 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1246 | Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, |
1247 | MemVT, MachinePointerInfo()); |
1248 | InFlag = Chain.getValue(1); |
1249 | curOffset += PerStoreOffset; |
1250 | } |
1251 | } |
1252 | ++paramCount; |
1253 | --OIdx; |
1254 | continue; |
1255 | } |
1256 | // Plain scalar |
1257 | // for ABI, declare .param .b<size> .param<n>; |
1258 | unsigned sz = VT.getSizeInBits(); |
1259 | bool needExtend = false; |
1260 | if (VT.isInteger()) { |
1261 | if (sz < 16) |
1262 | needExtend = true; |
1263 | if (sz < 32) |
1264 | sz = 32; |
1265 | } |
1266 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1267 | SDValue DeclareParamOps[] = { Chain, |
1268 | DAG.getConstant(paramCount, MVT::i32), |
1269 | DAG.getConstant(sz, MVT::i32), |
1270 | DAG.getConstant(0, MVT::i32), InFlag }; |
1271 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, |
1272 | DeclareParamOps); |
1273 | InFlag = Chain.getValue(1); |
1274 | SDValue OutV = OutVals[OIdx]; |
1275 | if (needExtend) { |
1276 | // zext/sext i1 to i16 |
1277 | unsigned opc = ISD::ZERO_EXTEND; |
1278 | if (Outs[OIdx].Flags.isSExt()) |
1279 | opc = ISD::SIGN_EXTEND; |
1280 | OutV = DAG.getNode(opc, dl, MVT::i16, OutV); |
1281 | } |
1282 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1283 | SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), |
1284 | DAG.getConstant(0, MVT::i32), OutV, InFlag }; |
1285 | |
1286 | unsigned opcode = NVPTXISD::StoreParam; |
1287 | if (Outs[OIdx].Flags.isZExt()) |
1288 | opcode = NVPTXISD::StoreParamU32; |
1289 | else if (Outs[OIdx].Flags.isSExt()) |
1290 | opcode = NVPTXISD::StoreParamS32; |
1291 | Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, |
1292 | VT, MachinePointerInfo()); |
1293 | |
1294 | InFlag = Chain.getValue(1); |
1295 | ++paramCount; |
1296 | continue; |
1297 | } |
1298 | // struct or vector |
1299 | SmallVector<EVT, 16> vtparts; |
1300 | SmallVector<uint64_t, 16> Offsets; |
1301 | const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); |
1302 | assert(PTy && "Type of a byval parameter should be pointer")((PTy && "Type of a byval parameter should be pointer" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Type of a byval parameter should be pointer\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1302, __PRETTY_FUNCTION__)); |
1303 | ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); |
1304 | |
1305 | // declare .param .align <align> .b8 .param<n>[<size>]; |
1306 | unsigned sz = Outs[OIdx].Flags.getByValSize(); |
1307 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1308 | unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); |
1309 | // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, |
1310 | // so we don't need to worry about natural alignment or not. |
1311 | // See TargetLowering::LowerCallTo(). |
1312 | SDValue DeclareParamOps[] = { |
1313 | Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), |
1314 | DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), |
1315 | InFlag |
1316 | }; |
1317 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, |
1318 | DeclareParamOps); |
1319 | InFlag = Chain.getValue(1); |
1320 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { |
1321 | EVT elemtype = vtparts[j]; |
1322 | int curOffset = Offsets[j]; |
1323 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); |
1324 | SDValue srcAddr = |
1325 | DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], |
1326 | DAG.getConstant(curOffset, getPointerTy())); |
1327 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, |
1328 | MachinePointerInfo(), false, false, false, |
1329 | PartAlign); |
1330 | if (elemtype.getSizeInBits() < 16) { |
1331 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); |
1332 | } |
1333 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1334 | SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), |
1335 | DAG.getConstant(curOffset, MVT::i32), theVal, |
1336 | InFlag }; |
1337 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, |
1338 | CopyParamOps, elemtype, |
1339 | MachinePointerInfo()); |
1340 | |
1341 | InFlag = Chain.getValue(1); |
1342 | } |
1343 | ++paramCount; |
1344 | } |
1345 | |
1346 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); |
1347 | unsigned retAlignment = 0; |
1348 | |
1349 | // Handle Result |
1350 | if (Ins.size() > 0) { |
1351 | SmallVector<EVT, 16> resvtparts; |
1352 | ComputeValueVTs(*this, retTy, resvtparts); |
1353 | |
1354 | // Declare |
1355 | // .param .align 16 .b8 retval0[<size-in-bytes>], or |
1356 | // .param .b<size-in-bits> retval0 |
1357 | unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); |
1358 | if (retTy->isSingleValueType()) { |
1359 | // Scalar needs to be at least 32bit wide |
1360 | if (resultsz < 32) |
1361 | resultsz = 32; |
1362 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1363 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), |
1364 | DAG.getConstant(resultsz, MVT::i32), |
1365 | DAG.getConstant(0, MVT::i32), InFlag }; |
1366 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, |
1367 | DeclareRetOps); |
1368 | InFlag = Chain.getValue(1); |
1369 | } else { |
1370 | retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); |
1371 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1372 | SDValue DeclareRetOps[] = { Chain, |
1373 | DAG.getConstant(retAlignment, MVT::i32), |
1374 | DAG.getConstant(resultsz / 8, MVT::i32), |
1375 | DAG.getConstant(0, MVT::i32), InFlag }; |
1376 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, |
1377 | DeclareRetOps); |
1378 | InFlag = Chain.getValue(1); |
1379 | } |
1380 | } |
1381 | |
1382 | if (!Func) { |
1383 | // This is indirect function call case : PTX requires a prototype of the |
1384 | // form |
1385 | // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); |
1386 | // to be emitted, and the label has to used as the last arg of call |
1387 | // instruction. |
1388 | // The prototype is embedded in a string and put as the operand for a |
1389 | // CallPrototype SDNode which will print out to the value of the string. |
1390 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1391 | std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); |
1392 | const char *ProtoStr = |
1393 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); |
1394 | SDValue ProtoOps[] = { |
1395 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, |
1396 | }; |
1397 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); |
1398 | InFlag = Chain.getValue(1); |
1399 | } |
1400 | // Op to just print "call" |
1401 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1402 | SDValue PrintCallOps[] = { |
1403 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag |
1404 | }; |
1405 | Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), |
1406 | dl, PrintCallVTs, PrintCallOps); |
1407 | InFlag = Chain.getValue(1); |
1408 | |
1409 | // Ops to print out the function name |
1410 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1411 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; |
1412 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); |
1413 | InFlag = Chain.getValue(1); |
1414 | |
1415 | // Ops to print out the param list |
1416 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1417 | SDValue CallArgBeginOps[] = { Chain, InFlag }; |
1418 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, |
1419 | CallArgBeginOps); |
1420 | InFlag = Chain.getValue(1); |
1421 | |
1422 | for (unsigned i = 0, e = paramCount; i != e; ++i) { |
1423 | unsigned opcode; |
1424 | if (i == (e - 1)) |
1425 | opcode = NVPTXISD::LastCallArg; |
1426 | else |
1427 | opcode = NVPTXISD::CallArg; |
1428 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1429 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), |
1430 | DAG.getConstant(i, MVT::i32), InFlag }; |
1431 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); |
1432 | InFlag = Chain.getValue(1); |
1433 | } |
1434 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1435 | SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), |
1436 | InFlag }; |
1437 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); |
1438 | InFlag = Chain.getValue(1); |
1439 | |
1440 | if (!Func) { |
1441 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); |
1442 | SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), |
1443 | InFlag }; |
1444 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); |
1445 | InFlag = Chain.getValue(1); |
1446 | } |
1447 | |
1448 | // Generate loads from param memory/moves from registers for result |
1449 | if (Ins.size() > 0) { |
1450 | if (retTy && retTy->isVectorTy()) { |
1451 | EVT ObjectVT = getValueType(retTy); |
1452 | unsigned NumElts = ObjectVT.getVectorNumElements(); |
1453 | EVT EltVT = ObjectVT.getVectorElementType(); |
1454 | assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)) |
1455 | F->getContext(), ObjectVT) == NumElts &&((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)) |
1456 | "Vector was not scalarized")((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)); |
1457 | unsigned sz = EltVT.getSizeInBits(); |
1458 | bool needTruncate = sz < 8 ? true : false; |
1459 | |
1460 | if (NumElts == 1) { |
1461 | // Just a simple load |
1462 | SmallVector<EVT, 4> LoadRetVTs; |
1463 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { |
1464 | // If loading i1/i8 result, generate |
1465 | // load.b8 i16 |
1466 | // if i1 |
1467 | // trunc i16 to i1 |
1468 | LoadRetVTs.push_back(MVT::i16); |
1469 | } else |
1470 | LoadRetVTs.push_back(EltVT); |
1471 | LoadRetVTs.push_back(MVT::Other); |
1472 | LoadRetVTs.push_back(MVT::Glue); |
1473 | SmallVector<SDValue, 4> LoadRetOps; |
1474 | LoadRetOps.push_back(Chain); |
1475 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); |
1476 | LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); |
1477 | LoadRetOps.push_back(InFlag); |
1478 | SDValue retval = DAG.getMemIntrinsicNode( |
1479 | NVPTXISD::LoadParam, dl, |
1480 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); |
1481 | Chain = retval.getValue(1); |
1482 | InFlag = retval.getValue(2); |
1483 | SDValue Ret0 = retval; |
1484 | if (needTruncate) |
1485 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); |
1486 | InVals.push_back(Ret0); |
1487 | } else if (NumElts == 2) { |
1488 | // LoadV2 |
1489 | SmallVector<EVT, 4> LoadRetVTs; |
1490 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { |
1491 | // If loading i1/i8 result, generate |
1492 | // load.b8 i16 |
1493 | // if i1 |
1494 | // trunc i16 to i1 |
1495 | LoadRetVTs.push_back(MVT::i16); |
1496 | LoadRetVTs.push_back(MVT::i16); |
1497 | } else { |
1498 | LoadRetVTs.push_back(EltVT); |
1499 | LoadRetVTs.push_back(EltVT); |
1500 | } |
1501 | LoadRetVTs.push_back(MVT::Other); |
1502 | LoadRetVTs.push_back(MVT::Glue); |
1503 | SmallVector<SDValue, 4> LoadRetOps; |
1504 | LoadRetOps.push_back(Chain); |
1505 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); |
1506 | LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); |
1507 | LoadRetOps.push_back(InFlag); |
1508 | SDValue retval = DAG.getMemIntrinsicNode( |
1509 | NVPTXISD::LoadParamV2, dl, |
1510 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); |
1511 | Chain = retval.getValue(2); |
1512 | InFlag = retval.getValue(3); |
1513 | SDValue Ret0 = retval.getValue(0); |
1514 | SDValue Ret1 = retval.getValue(1); |
1515 | if (needTruncate) { |
1516 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); |
1517 | InVals.push_back(Ret0); |
1518 | Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); |
1519 | InVals.push_back(Ret1); |
1520 | } else { |
1521 | InVals.push_back(Ret0); |
1522 | InVals.push_back(Ret1); |
1523 | } |
1524 | } else { |
1525 | // Split into N LoadV4 |
1526 | unsigned Ofst = 0; |
1527 | unsigned VecSize = 4; |
1528 | unsigned Opc = NVPTXISD::LoadParamV4; |
1529 | if (EltVT.getSizeInBits() == 64) { |
1530 | VecSize = 2; |
1531 | Opc = NVPTXISD::LoadParamV2; |
1532 | } |
1533 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); |
1534 | for (unsigned i = 0; i < NumElts; i += VecSize) { |
1535 | SmallVector<EVT, 8> LoadRetVTs; |
1536 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { |
1537 | // If loading i1/i8 result, generate |
1538 | // load.b8 i16 |
1539 | // if i1 |
1540 | // trunc i16 to i1 |
1541 | for (unsigned j = 0; j < VecSize; ++j) |
1542 | LoadRetVTs.push_back(MVT::i16); |
1543 | } else { |
1544 | for (unsigned j = 0; j < VecSize; ++j) |
1545 | LoadRetVTs.push_back(EltVT); |
1546 | } |
1547 | LoadRetVTs.push_back(MVT::Other); |
1548 | LoadRetVTs.push_back(MVT::Glue); |
1549 | SmallVector<SDValue, 4> LoadRetOps; |
1550 | LoadRetOps.push_back(Chain); |
1551 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); |
1552 | LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); |
1553 | LoadRetOps.push_back(InFlag); |
1554 | SDValue retval = DAG.getMemIntrinsicNode( |
1555 | Opc, dl, DAG.getVTList(LoadRetVTs), |
1556 | LoadRetOps, EltVT, MachinePointerInfo()); |
1557 | if (VecSize == 2) { |
1558 | Chain = retval.getValue(2); |
1559 | InFlag = retval.getValue(3); |
1560 | } else { |
1561 | Chain = retval.getValue(4); |
1562 | InFlag = retval.getValue(5); |
1563 | } |
1564 | |
1565 | for (unsigned j = 0; j < VecSize; ++j) { |
1566 | if (i + j >= NumElts) |
1567 | break; |
1568 | SDValue Elt = retval.getValue(j); |
1569 | if (needTruncate) |
1570 | Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); |
1571 | InVals.push_back(Elt); |
1572 | } |
1573 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); |
1574 | } |
1575 | } |
1576 | } else { |
1577 | SmallVector<EVT, 16> VTs; |
1578 | SmallVector<uint64_t, 16> Offsets; |
1579 | ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); |
1580 | assert(VTs.size() == Ins.size() && "Bad value decomposition")((VTs.size() == Ins.size() && "Bad value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == Ins.size() && \"Bad value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1580, __PRETTY_FUNCTION__)); |
1581 | unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); |
1582 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
1583 | unsigned sz = VTs[i].getSizeInBits(); |
1584 | unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); |
1585 | bool needTruncate = sz < 8 ? true : false; |
1586 | if (VTs[i].isInteger() && (sz < 8)) |
1587 | sz = 8; |
1588 | |
1589 | SmallVector<EVT, 4> LoadRetVTs; |
1590 | EVT TheLoadType = VTs[i]; |
1591 | if (retTy->isIntegerTy() && |
1592 | TD->getTypeAllocSizeInBits(retTy) < 32) { |
1593 | // This is for integer types only, and specifically not for |
1594 | // aggregates. |
1595 | LoadRetVTs.push_back(MVT::i32); |
1596 | TheLoadType = MVT::i32; |
1597 | } else if (sz < 16) { |
1598 | // If loading i1/i8 result, generate |
1599 | // load i8 (-> i16) |
1600 | // trunc i16 to i1/i8 |
1601 | LoadRetVTs.push_back(MVT::i16); |
1602 | } else |
1603 | LoadRetVTs.push_back(Ins[i].VT); |
1604 | LoadRetVTs.push_back(MVT::Other); |
1605 | LoadRetVTs.push_back(MVT::Glue); |
1606 | |
1607 | SmallVector<SDValue, 4> LoadRetOps; |
1608 | LoadRetOps.push_back(Chain); |
1609 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); |
1610 | LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); |
1611 | LoadRetOps.push_back(InFlag); |
1612 | SDValue retval = DAG.getMemIntrinsicNode( |
1613 | NVPTXISD::LoadParam, dl, |
1614 | DAG.getVTList(LoadRetVTs), LoadRetOps, |
1615 | TheLoadType, MachinePointerInfo(), AlignI); |
1616 | Chain = retval.getValue(1); |
1617 | InFlag = retval.getValue(2); |
1618 | SDValue Ret0 = retval.getValue(0); |
1619 | if (needTruncate) |
1620 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); |
1621 | InVals.push_back(Ret0); |
1622 | } |
1623 | } |
1624 | } |
1625 | |
1626 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), |
1627 | DAG.getIntPtrConstant(uniqueCallSite + 1, true), |
1628 | InFlag, dl); |
1629 | uniqueCallSite++; |
1630 | |
1631 | // set isTailCall to false for now, until we figure out how to express |
1632 | // tail call optimization in PTX |
1633 | isTailCall = false; |
1634 | return Chain; |
1635 | } |
1636 | |
1637 | // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() |
1638 | // (see LegalizeDAG.cpp). This is slow and uses local memory. |
1639 | // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 |
1640 | SDValue |
1641 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { |
1642 | SDNode *Node = Op.getNode(); |
1643 | SDLoc dl(Node); |
1644 | SmallVector<SDValue, 8> Ops; |
1645 | unsigned NumOperands = Node->getNumOperands(); |
1646 | for (unsigned i = 0; i < NumOperands; ++i) { |
1647 | SDValue SubOp = Node->getOperand(i); |
1648 | EVT VVT = SubOp.getNode()->getValueType(0); |
1649 | EVT EltVT = VVT.getVectorElementType(); |
1650 | unsigned NumSubElem = VVT.getVectorNumElements(); |
1651 | for (unsigned j = 0; j < NumSubElem; ++j) { |
1652 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, |
1653 | DAG.getIntPtrConstant(j))); |
1654 | } |
1655 | } |
1656 | return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); |
1657 | } |
1658 | |
1659 | /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which |
1660 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift |
1661 | /// amount, or |
1662 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift |
1663 | /// amount. |
1664 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, |
1665 | SelectionDAG &DAG) const { |
1666 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1666, __PRETTY_FUNCTION__)); |
1667 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD:: SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1667, __PRETTY_FUNCTION__)); |
1668 | |
1669 | EVT VT = Op.getValueType(); |
1670 | unsigned VTBits = VT.getSizeInBits(); |
1671 | SDLoc dl(Op); |
1672 | SDValue ShOpLo = Op.getOperand(0); |
1673 | SDValue ShOpHi = Op.getOperand(1); |
1674 | SDValue ShAmt = Op.getOperand(2); |
1675 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; |
1676 | |
1677 | if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { |
1678 | |
1679 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. |
1680 | // {dHi, dLo} = {aHi, aLo} >> Amt |
1681 | // dHi = aHi >> Amt |
1682 | // dLo = shf.r.clamp aLo, aHi, Amt |
1683 | |
1684 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); |
1685 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, |
1686 | ShAmt); |
1687 | |
1688 | SDValue Ops[2] = { Lo, Hi }; |
1689 | return DAG.getMergeValues(Ops, dl); |
1690 | } |
1691 | else { |
1692 | |
1693 | // {dHi, dLo} = {aHi, aLo} >> Amt |
1694 | // - if (Amt>=size) then |
1695 | // dLo = aHi >> (Amt-size) |
1696 | // dHi = aHi >> Amt (this is either all 0 or all 1) |
1697 | // else |
1698 | // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) |
1699 | // dHi = aHi >> Amt |
1700 | |
1701 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, |
1702 | DAG.getConstant(VTBits, MVT::i32), ShAmt); |
1703 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); |
1704 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, |
1705 | DAG.getConstant(VTBits, MVT::i32)); |
1706 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); |
1707 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); |
1708 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); |
1709 | |
1710 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, |
1711 | DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); |
1712 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); |
1713 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); |
1714 | |
1715 | SDValue Ops[2] = { Lo, Hi }; |
1716 | return DAG.getMergeValues(Ops, dl); |
1717 | } |
1718 | } |
1719 | |
1720 | /// LowerShiftLeftParts - Lower SHL_PARTS, which |
1721 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift |
1722 | /// amount, or |
1723 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift |
1724 | /// amount. |
1725 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, |
1726 | SelectionDAG &DAG) const { |
1727 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1727, __PRETTY_FUNCTION__)); |
1728 | assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1728, __PRETTY_FUNCTION__)); |
1729 | |
1730 | EVT VT = Op.getValueType(); |
1731 | unsigned VTBits = VT.getSizeInBits(); |
1732 | SDLoc dl(Op); |
1733 | SDValue ShOpLo = Op.getOperand(0); |
1734 | SDValue ShOpHi = Op.getOperand(1); |
1735 | SDValue ShAmt = Op.getOperand(2); |
1736 | |
1737 | if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { |
1738 | |
1739 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. |
1740 | // {dHi, dLo} = {aHi, aLo} << Amt |
1741 | // dHi = shf.l.clamp aLo, aHi, Amt |
1742 | // dLo = aLo << Amt |
1743 | |
1744 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, |
1745 | ShAmt); |
1746 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); |
1747 | |
1748 | SDValue Ops[2] = { Lo, Hi }; |
1749 | return DAG.getMergeValues(Ops, dl); |
1750 | } |
1751 | else { |
1752 | |
1753 | // {dHi, dLo} = {aHi, aLo} << Amt |
1754 | // - if (Amt>=size) then |
1755 | // dLo = aLo << Amt (all 0) |
1756 | // dLo = aLo << (Amt-size) |
1757 | // else |
1758 | // dLo = aLo << Amt |
1759 | // dHi = (aHi << Amt) | (aLo >> (size-Amt)) |
1760 | |
1761 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, |
1762 | DAG.getConstant(VTBits, MVT::i32), ShAmt); |
1763 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); |
1764 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, |
1765 | DAG.getConstant(VTBits, MVT::i32)); |
1766 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); |
1767 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); |
1768 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); |
1769 | |
1770 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, |
1771 | DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); |
1772 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); |
1773 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); |
1774 | |
1775 | SDValue Ops[2] = { Lo, Hi }; |
1776 | return DAG.getMergeValues(Ops, dl); |
1777 | } |
1778 | } |
1779 | |
1780 | SDValue |
1781 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
1782 | switch (Op.getOpcode()) { |
1783 | case ISD::RETURNADDR: |
1784 | return SDValue(); |
1785 | case ISD::FRAMEADDR: |
1786 | return SDValue(); |
1787 | case ISD::GlobalAddress: |
1788 | return LowerGlobalAddress(Op, DAG); |
1789 | case ISD::INTRINSIC_W_CHAIN: |
1790 | return Op; |
1791 | case ISD::BUILD_VECTOR: |
1792 | case ISD::EXTRACT_SUBVECTOR: |
1793 | return Op; |
1794 | case ISD::CONCAT_VECTORS: |
1795 | return LowerCONCAT_VECTORS(Op, DAG); |
1796 | case ISD::STORE: |
1797 | return LowerSTORE(Op, DAG); |
1798 | case ISD::LOAD: |
1799 | return LowerLOAD(Op, DAG); |
1800 | case ISD::SHL_PARTS: |
1801 | return LowerShiftLeftParts(Op, DAG); |
1802 | case ISD::SRA_PARTS: |
1803 | case ISD::SRL_PARTS: |
1804 | return LowerShiftRightParts(Op, DAG); |
1805 | default: |
1806 | llvm_unreachable("Custom lowering not defined for operation")::llvm::llvm_unreachable_internal("Custom lowering not defined for operation" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1806); |
1807 | } |
1808 | } |
1809 | |
1810 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
1811 | if (Op.getValueType() == MVT::i1) |
1812 | return LowerLOADi1(Op, DAG); |
1813 | else |
1814 | return SDValue(); |
1815 | } |
1816 | |
1817 | // v = ld i1* addr |
1818 | // => |
1819 | // v1 = ld i8* addr (-> i16) |
1820 | // v = trunc i16 to i1 |
1821 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { |
1822 | SDNode *Node = Op.getNode(); |
1823 | LoadSDNode *LD = cast<LoadSDNode>(Node); |
1824 | SDLoc dl(Node); |
1825 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD)((LD->getExtensionType() == ISD::NON_EXTLOAD) ? static_cast <void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1825, __PRETTY_FUNCTION__)); |
1826 | assert(Node->getValueType(0) == MVT::i1 &&((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1827, __PRETTY_FUNCTION__)) |
1827 | "Custom lowering for i1 load only")((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1827, __PRETTY_FUNCTION__)); |
1828 | SDValue newLD = |
1829 | DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), |
1830 | LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), |
1831 | LD->isInvariant(), LD->getAlignment()); |
1832 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); |
1833 | // The legalizer (the caller) is expecting two values from the legalized |
1834 | // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() |
1835 | // in LegalizeDAG.cpp which also uses MergeValues. |
1836 | SDValue Ops[] = { result, LD->getChain() }; |
1837 | return DAG.getMergeValues(Ops, dl); |
1838 | } |
1839 | |
1840 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
1841 | EVT ValVT = Op.getOperand(1).getValueType(); |
1842 | if (ValVT == MVT::i1) |
1843 | return LowerSTOREi1(Op, DAG); |
1844 | else if (ValVT.isVector()) |
1845 | return LowerSTOREVector(Op, DAG); |
1846 | else |
1847 | return SDValue(); |
1848 | } |
1849 | |
1850 | SDValue |
1851 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { |
1852 | SDNode *N = Op.getNode(); |
1853 | SDValue Val = N->getOperand(1); |
1854 | SDLoc DL(N); |
1855 | EVT ValVT = Val.getValueType(); |
1856 | |
1857 | if (ValVT.isVector()) { |
1858 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not |
1859 | // legal. We can (and should) split that into 2 stores of <2 x double> here |
1860 | // but I'm leaving that as a TODO for now. |
1861 | if (!ValVT.isSimple()) |
1862 | return SDValue(); |
1863 | switch (ValVT.getSimpleVT().SimpleTy) { |
1864 | default: |
1865 | return SDValue(); |
1866 | case MVT::v2i8: |
1867 | case MVT::v2i16: |
1868 | case MVT::v2i32: |
1869 | case MVT::v2i64: |
1870 | case MVT::v2f32: |
1871 | case MVT::v2f64: |
1872 | case MVT::v4i8: |
1873 | case MVT::v4i16: |
1874 | case MVT::v4i32: |
1875 | case MVT::v4f32: |
1876 | // This is a "native" vector type |
1877 | break; |
1878 | } |
1879 | |
1880 | MemSDNode *MemSD = cast<MemSDNode>(N); |
1881 | const DataLayout *TD = getDataLayout(); |
1882 | |
1883 | unsigned Align = MemSD->getAlignment(); |
1884 | unsigned PrefAlign = |
1885 | TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); |
1886 | if (Align < PrefAlign) { |
1887 | // This store is not sufficiently aligned, so bail out and let this vector |
1888 | // store be scalarized. Note that we may still be able to emit smaller |
1889 | // vector stores. For example, if we are storing a <4 x float> with an |
1890 | // alignment of 8, this check will fail but the legalizer will try again |
1891 | // with 2 x <2 x float>, which will succeed with an alignment of 8. |
1892 | return SDValue(); |
1893 | } |
1894 | |
1895 | unsigned Opcode = 0; |
1896 | EVT EltVT = ValVT.getVectorElementType(); |
1897 | unsigned NumElts = ValVT.getVectorNumElements(); |
1898 | |
1899 | // Since StoreV2 is a target node, we cannot rely on DAG type legalization. |
1900 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the |
1901 | // stored type to i16 and propagate the "real" type as the memory type. |
1902 | bool NeedExt = false; |
1903 | if (EltVT.getSizeInBits() < 16) |
1904 | NeedExt = true; |
1905 | |
1906 | switch (NumElts) { |
1907 | default: |
1908 | return SDValue(); |
1909 | case 2: |
1910 | Opcode = NVPTXISD::StoreV2; |
1911 | break; |
1912 | case 4: { |
1913 | Opcode = NVPTXISD::StoreV4; |
1914 | break; |
1915 | } |
1916 | } |
1917 | |
1918 | SmallVector<SDValue, 8> Ops; |
1919 | |
1920 | // First is the chain |
1921 | Ops.push_back(N->getOperand(0)); |
1922 | |
1923 | // Then the split values |
1924 | for (unsigned i = 0; i < NumElts; ++i) { |
1925 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, |
1926 | DAG.getIntPtrConstant(i)); |
1927 | if (NeedExt) |
1928 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); |
1929 | Ops.push_back(ExtVal); |
1930 | } |
1931 | |
1932 | // Then any remaining arguments |
1933 | for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { |
1934 | Ops.push_back(N->getOperand(i)); |
1935 | } |
1936 | |
1937 | SDValue NewSt = DAG.getMemIntrinsicNode( |
1938 | Opcode, DL, DAG.getVTList(MVT::Other), Ops, |
1939 | MemSD->getMemoryVT(), MemSD->getMemOperand()); |
1940 | |
1941 | //return DCI.CombineTo(N, NewSt, true); |
1942 | return NewSt; |
1943 | } |
1944 | |
1945 | return SDValue(); |
1946 | } |
1947 | |
1948 | // st i1 v, addr |
1949 | // => |
1950 | // v1 = zxt v to i16 |
1951 | // st.u8 i16, addr |
1952 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { |
1953 | SDNode *Node = Op.getNode(); |
1954 | SDLoc dl(Node); |
1955 | StoreSDNode *ST = cast<StoreSDNode>(Node); |
1956 | SDValue Tmp1 = ST->getChain(); |
1957 | SDValue Tmp2 = ST->getBasePtr(); |
1958 | SDValue Tmp3 = ST->getValue(); |
1959 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only")((Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only" ) ? static_cast<void> (0) : __assert_fail ("Tmp3.getValueType() == MVT::i1 && \"Custom lowering for i1 store only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1959, __PRETTY_FUNCTION__)); |
1960 | unsigned Alignment = ST->getAlignment(); |
1961 | bool isVolatile = ST->isVolatile(); |
1962 | bool isNonTemporal = ST->isNonTemporal(); |
1963 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); |
1964 | SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, |
1965 | ST->getPointerInfo(), MVT::i8, isNonTemporal, |
1966 | isVolatile, Alignment); |
1967 | return Result; |
1968 | } |
1969 | |
1970 | SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, |
1971 | int idx, EVT v) const { |
1972 | std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); |
1973 | std::stringstream suffix; |
1974 | suffix << idx; |
1975 | *name += suffix.str(); |
1976 | return DAG.getTargetExternalSymbol(name->c_str(), v); |
1977 | } |
1978 | |
1979 | SDValue |
1980 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { |
1981 | std::string ParamSym; |
1982 | raw_string_ostream ParamStr(ParamSym); |
1983 | |
1984 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; |
1985 | ParamStr.flush(); |
1986 | |
1987 | std::string *SavedStr = |
1988 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); |
1989 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); |
1990 | } |
1991 | |
1992 | SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { |
1993 | return getExtSymb(DAG, ".HLPPARAM", idx); |
1994 | } |
1995 | |
1996 | // Check to see if the kernel argument is image*_t or sampler_t |
1997 | |
1998 | bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { |
1999 | static const char *const specialTypes[] = { "struct._image2d_t", |
2000 | "struct._image3d_t", |
2001 | "struct._sampler_t" }; |
2002 | |
2003 | const Type *Ty = arg->getType(); |
2004 | const PointerType *PTy = dyn_cast<PointerType>(Ty); |
2005 | |
2006 | if (!PTy) |
2007 | return false; |
2008 | |
2009 | if (!context) |
2010 | return false; |
2011 | |
2012 | const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); |
2013 | const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; |
2014 | |
2015 | for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) |
2016 | if (TypeName == specialTypes[i]) |
2017 | return true; |
2018 | |
2019 | return false; |
2020 | } |
2021 | |
2022 | SDValue NVPTXTargetLowering::LowerFormalArguments( |
2023 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
2024 | const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, |
2025 | SmallVectorImpl<SDValue> &InVals) const { |
2026 | MachineFunction &MF = DAG.getMachineFunction(); |
2027 | const DataLayout *TD = getDataLayout(); |
2028 | |
2029 | const Function *F = MF.getFunction(); |
2030 | const AttributeSet &PAL = F->getAttributes(); |
2031 | const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); |
2032 | |
2033 | SDValue Root = DAG.getRoot(); |
2034 | std::vector<SDValue> OutChains; |
2035 | |
2036 | bool isKernel = llvm::isKernelFunction(*F); |
2037 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); |
2038 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2038, __PRETTY_FUNCTION__)); |
2039 | if (!isABI) |
2040 | return Chain; |
2041 | |
2042 | std::vector<Type *> argTypes; |
2043 | std::vector<const Argument *> theArgs; |
2044 | for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); |
2045 | I != E; ++I) { |
2046 | theArgs.push_back(I); |
2047 | argTypes.push_back(I->getType()); |
2048 | } |
2049 | // argTypes.size() (or theArgs.size()) and Ins.size() need not match. |
2050 | // Ins.size() will be larger |
2051 | // * if there is an aggregate argument with multiple fields (each field |
2052 | // showing up separately in Ins) |
2053 | // * if there is a vector argument with more than typical vector-length |
2054 | // elements (generally if more than 4) where each vector element is |
2055 | // individually present in Ins. |
2056 | // So a different index should be used for indexing into Ins. |
2057 | // See similar issue in LowerCall. |
2058 | unsigned InsIdx = 0; |
2059 | |
2060 | int idx = 0; |
2061 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { |
2062 | Type *Ty = argTypes[i]; |
2063 | |
2064 | // If the kernel argument is image*_t or sampler_t, convert it to |
2065 | // a i32 constant holding the parameter position. This can later |
2066 | // matched in the AsmPrinter to output the correct mangled name. |
2067 | if (isImageOrSamplerVal( |
2068 | theArgs[i], |
2069 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() |
2070 | : nullptr))) { |
2071 | assert(isKernel && "Only kernels can have image/sampler params")((isKernel && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("isKernel && \"Only kernels can have image/sampler params\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2071, __PRETTY_FUNCTION__)); |
2072 | InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); |
2073 | continue; |
2074 | } |
2075 | |
2076 | if (theArgs[i]->use_empty()) { |
2077 | // argument is dead |
2078 | if (Ty->isAggregateType()) { |
2079 | SmallVector<EVT, 16> vtparts; |
2080 | |
2081 | ComputePTXValueVTs(*this, Ty, vtparts); |
2082 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2082, __PRETTY_FUNCTION__)); |
2083 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; |
2084 | ++parti) { |
2085 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2086 | ++InsIdx; |
2087 | } |
2088 | if (vtparts.size() > 0) |
2089 | --InsIdx; |
2090 | continue; |
2091 | } |
2092 | if (Ty->isVectorTy()) { |
2093 | EVT ObjectVT = getValueType(Ty); |
2094 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); |
2095 | for (unsigned parti = 0; parti < NumRegs; ++parti) { |
2096 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2097 | ++InsIdx; |
2098 | } |
2099 | if (NumRegs > 0) |
2100 | --InsIdx; |
2101 | continue; |
2102 | } |
2103 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); |
2104 | continue; |
2105 | } |
2106 | |
2107 | // In the following cases, assign a node order of "idx+1" |
2108 | // to newly created nodes. The SDNodes for params have to |
2109 | // appear in the same order as their order of appearance |
2110 | // in the original function. "idx+1" holds that order. |
2111 | if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { |
2112 | if (Ty->isAggregateType()) { |
2113 | SmallVector<EVT, 16> vtparts; |
2114 | SmallVector<uint64_t, 16> offsets; |
2115 | |
2116 | // NOTE: Here, we lose the ability to issue vector loads for vectors |
2117 | // that are a part of a struct. This should be investigated in the |
2118 | // future. |
2119 | ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); |
2120 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2120, __PRETTY_FUNCTION__)); |
2121 | bool aggregateIsPacked = false; |
2122 | if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) |
2123 | aggregateIsPacked = STy->isPacked(); |
2124 | |
2125 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); |
2126 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; |
2127 | ++parti) { |
2128 | EVT partVT = vtparts[parti]; |
2129 | Value *srcValue = Constant::getNullValue( |
2130 | PointerType::get(partVT.getTypeForEVT(F->getContext()), |
2131 | llvm::ADDRESS_SPACE_PARAM)); |
2132 | SDValue srcAddr = |
2133 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, |
2134 | DAG.getConstant(offsets[parti], getPointerTy())); |
2135 | unsigned partAlign = |
2136 | aggregateIsPacked ? 1 |
2137 | : TD->getABITypeAlignment( |
2138 | partVT.getTypeForEVT(F->getContext())); |
2139 | SDValue p; |
2140 | if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { |
2141 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? |
2142 | ISD::SEXTLOAD : ISD::ZEXTLOAD; |
2143 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, |
2144 | MachinePointerInfo(srcValue), partVT, false, |
2145 | false, false, partAlign); |
2146 | } else { |
2147 | p = DAG.getLoad(partVT, dl, Root, srcAddr, |
2148 | MachinePointerInfo(srcValue), false, false, false, |
2149 | partAlign); |
2150 | } |
2151 | if (p.getNode()) |
2152 | p.getNode()->setIROrder(idx + 1); |
2153 | InVals.push_back(p); |
2154 | ++InsIdx; |
2155 | } |
2156 | if (vtparts.size() > 0) |
2157 | --InsIdx; |
2158 | continue; |
2159 | } |
2160 | if (Ty->isVectorTy()) { |
2161 | EVT ObjectVT = getValueType(Ty); |
2162 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); |
2163 | unsigned NumElts = ObjectVT.getVectorNumElements(); |
2164 | assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2165, __PRETTY_FUNCTION__)) |
2165 | "Vector was not scalarized")((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2165, __PRETTY_FUNCTION__)); |
2166 | unsigned Ofst = 0; |
2167 | EVT EltVT = ObjectVT.getVectorElementType(); |
2168 | |
2169 | // V1 load |
2170 | // f32 = load ... |
2171 | if (NumElts == 1) { |
2172 | // We only have one element, so just directly load it |
2173 | Value *SrcValue = Constant::getNullValue(PointerType::get( |
2174 | EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); |
2175 | SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, |
2176 | DAG.getConstant(Ofst, getPointerTy())); |
2177 | SDValue P = DAG.getLoad( |
2178 | EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, |
2179 | false, true, |
2180 | TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); |
2181 | if (P.getNode()) |
2182 | P.getNode()->setIROrder(idx + 1); |
2183 | |
2184 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) |
2185 | P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); |
2186 | InVals.push_back(P); |
2187 | Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); |
Value stored to 'Ofst' is never read | |
2188 | ++InsIdx; |
2189 | } else if (NumElts == 2) { |
2190 | // V2 load |
2191 | // f32,f32 = load ... |
2192 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); |
2193 | Value *SrcValue = Constant::getNullValue(PointerType::get( |
2194 | VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); |
2195 | SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, |
2196 | DAG.getConstant(Ofst, getPointerTy())); |
2197 | SDValue P = DAG.getLoad( |
2198 | VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, |
2199 | false, true, |
2200 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); |
2201 | if (P.getNode()) |
2202 | P.getNode()->setIROrder(idx + 1); |
2203 | |
2204 | SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, |
2205 | DAG.getIntPtrConstant(0)); |
2206 | SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, |
2207 | DAG.getIntPtrConstant(1)); |
2208 | |
2209 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { |
2210 | Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); |
2211 | Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); |
2212 | } |
2213 | |
2214 | InVals.push_back(Elt0); |
2215 | InVals.push_back(Elt1); |
2216 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); |
2217 | InsIdx += 2; |
2218 | } else { |
2219 | // V4 loads |
2220 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and |
2221 | // the |
2222 | // vector will be expanded to a power of 2 elements, so we know we can |
2223 | // always round up to the next multiple of 4 when creating the vector |
2224 | // loads. |
2225 | // e.g. 4 elem => 1 ld.v4 |
2226 | // 6 elem => 2 ld.v4 |
2227 | // 8 elem => 2 ld.v4 |
2228 | // 11 elem => 3 ld.v4 |
2229 | unsigned VecSize = 4; |
2230 | if (EltVT.getSizeInBits() == 64) { |
2231 | VecSize = 2; |
2232 | } |
2233 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); |
2234 | for (unsigned i = 0; i < NumElts; i += VecSize) { |
2235 | Value *SrcValue = Constant::getNullValue( |
2236 | PointerType::get(VecVT.getTypeForEVT(F->getContext()), |
2237 | llvm::ADDRESS_SPACE_PARAM)); |
2238 | SDValue SrcAddr = |
2239 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, |
2240 | DAG.getConstant(Ofst, getPointerTy())); |
2241 | SDValue P = DAG.getLoad( |
2242 | VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, |
2243 | false, true, |
2244 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); |
2245 | if (P.getNode()) |
2246 | P.getNode()->setIROrder(idx + 1); |
2247 | |
2248 | for (unsigned j = 0; j < VecSize; ++j) { |
2249 | if (i + j >= NumElts) |
2250 | break; |
2251 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, |
2252 | DAG.getIntPtrConstant(j)); |
2253 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) |
2254 | Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); |
2255 | InVals.push_back(Elt); |
2256 | } |
2257 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); |
2258 | } |
2259 | InsIdx += NumElts; |
2260 | } |
2261 | |
2262 | if (NumElts > 0) |
2263 | --InsIdx; |
2264 | continue; |
2265 | } |
2266 | // A plain scalar. |
2267 | EVT ObjectVT = getValueType(Ty); |
2268 | // If ABI, load from the param symbol |
2269 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); |
2270 | Value *srcValue = Constant::getNullValue(PointerType::get( |
2271 | ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); |
2272 | SDValue p; |
2273 | if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { |
2274 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? |
2275 | ISD::SEXTLOAD : ISD::ZEXTLOAD; |
2276 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, |
2277 | MachinePointerInfo(srcValue), ObjectVT, false, false, |
2278 | false, |
2279 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); |
2280 | } else { |
2281 | p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, |
2282 | MachinePointerInfo(srcValue), false, false, false, |
2283 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); |
2284 | } |
2285 | if (p.getNode()) |
2286 | p.getNode()->setIROrder(idx + 1); |
2287 | InVals.push_back(p); |
2288 | continue; |
2289 | } |
2290 | |
2291 | // Param has ByVal attribute |
2292 | // Return MoveParam(param symbol). |
2293 | // Ideally, the param symbol can be returned directly, |
2294 | // but when SDNode builder decides to use it in a CopyToReg(), |
2295 | // machine instruction fails because TargetExternalSymbol |
2296 | // (not lowered) is target dependent, and CopyToReg assumes |
2297 | // the source is lowered. |
2298 | EVT ObjectVT = getValueType(Ty); |
2299 | assert(ObjectVT == Ins[InsIdx].VT &&((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2300, __PRETTY_FUNCTION__)) |
2300 | "Ins type did not match function type")((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2300, __PRETTY_FUNCTION__)); |
2301 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); |
2302 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); |
2303 | if (p.getNode()) |
2304 | p.getNode()->setIROrder(idx + 1); |
2305 | if (isKernel) |
2306 | InVals.push_back(p); |
2307 | else { |
2308 | SDValue p2 = DAG.getNode( |
2309 | ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, |
2310 | DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); |
2311 | InVals.push_back(p2); |
2312 | } |
2313 | } |
2314 | |
2315 | // Clang will check explicit VarArg and issue error if any. However, Clang |
2316 | // will let code with |
2317 | // implicit var arg like f() pass. See bug 617733. |
2318 | // We treat this case as if the arg list is empty. |
2319 | // if (F.isVarArg()) { |
2320 | // assert(0 && "VarArg not supported yet!"); |
2321 | //} |
2322 | |
2323 | if (!OutChains.empty()) |
2324 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); |
2325 | |
2326 | return Chain; |
2327 | } |
2328 | |
2329 | |
2330 | SDValue |
2331 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2332 | bool isVarArg, |
2333 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2334 | const SmallVectorImpl<SDValue> &OutVals, |
2335 | SDLoc dl, SelectionDAG &DAG) const { |
2336 | MachineFunction &MF = DAG.getMachineFunction(); |
2337 | const Function *F = MF.getFunction(); |
2338 | Type *RetTy = F->getReturnType(); |
2339 | const DataLayout *TD = getDataLayout(); |
2340 | |
2341 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); |
2342 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2342, __PRETTY_FUNCTION__)); |
2343 | if (!isABI) |
2344 | return Chain; |
2345 | |
2346 | if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { |
2347 | // If we have a vector type, the OutVals array will be the scalarized |
2348 | // components and we have combine them into 1 or more vector stores. |
2349 | unsigned NumElts = VTy->getNumElements(); |
2350 | assert(NumElts == Outs.size() && "Bad scalarization of return value")((NumElts == Outs.size() && "Bad scalarization of return value" ) ? static_cast<void> (0) : __assert_fail ("NumElts == Outs.size() && \"Bad scalarization of return value\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2350, __PRETTY_FUNCTION__)); |
2351 | |
2352 | // const_cast can be removed in later LLVM versions |
2353 | EVT EltVT = getValueType(RetTy).getVectorElementType(); |
2354 | bool NeedExtend = false; |
2355 | if (EltVT.getSizeInBits() < 16) |
2356 | NeedExtend = true; |
2357 | |
2358 | // V1 store |
2359 | if (NumElts == 1) { |
2360 | SDValue StoreVal = OutVals[0]; |
2361 | // We only have one element, so just directly store it |
2362 | if (NeedExtend) |
2363 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); |
2364 | SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; |
2365 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, |
2366 | DAG.getVTList(MVT::Other), Ops, |
2367 | EltVT, MachinePointerInfo()); |
2368 | |
2369 | } else if (NumElts == 2) { |
2370 | // V2 store |
2371 | SDValue StoreVal0 = OutVals[0]; |
2372 | SDValue StoreVal1 = OutVals[1]; |
2373 | |
2374 | if (NeedExtend) { |
2375 | StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); |
2376 | StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); |
2377 | } |
2378 | |
2379 | SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, |
2380 | StoreVal1 }; |
2381 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, |
2382 | DAG.getVTList(MVT::Other), Ops, |
2383 | EltVT, MachinePointerInfo()); |
2384 | } else { |
2385 | // V4 stores |
2386 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the |
2387 | // vector will be expanded to a power of 2 elements, so we know we can |
2388 | // always round up to the next multiple of 4 when creating the vector |
2389 | // stores. |
2390 | // e.g. 4 elem => 1 st.v4 |
2391 | // 6 elem => 2 st.v4 |
2392 | // 8 elem => 2 st.v4 |
2393 | // 11 elem => 3 st.v4 |
2394 | |
2395 | unsigned VecSize = 4; |
2396 | if (OutVals[0].getValueType().getSizeInBits() == 64) |
2397 | VecSize = 2; |
2398 | |
2399 | unsigned Offset = 0; |
2400 | |
2401 | EVT VecVT = |
2402 | EVT::getVectorVT(F->getContext(), EltVT, VecSize); |
2403 | unsigned PerStoreOffset = |
2404 | TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); |
2405 | |
2406 | for (unsigned i = 0; i < NumElts; i += VecSize) { |
2407 | // Get values |
2408 | SDValue StoreVal; |
2409 | SmallVector<SDValue, 8> Ops; |
2410 | Ops.push_back(Chain); |
2411 | Ops.push_back(DAG.getConstant(Offset, MVT::i32)); |
2412 | unsigned Opc = NVPTXISD::StoreRetvalV2; |
2413 | EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); |
2414 | |
2415 | StoreVal = OutVals[i]; |
2416 | if (NeedExtend) |
2417 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); |
2418 | Ops.push_back(StoreVal); |
2419 | |
2420 | if (i + 1 < NumElts) { |
2421 | StoreVal = OutVals[i + 1]; |
2422 | if (NeedExtend) |
2423 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); |
2424 | } else { |
2425 | StoreVal = DAG.getUNDEF(ExtendedVT); |
2426 | } |
2427 | Ops.push_back(StoreVal); |
2428 | |
2429 | if (VecSize == 4) { |
2430 | Opc = NVPTXISD::StoreRetvalV4; |
2431 | if (i + 2 < NumElts) { |
2432 | StoreVal = OutVals[i + 2]; |
2433 | if (NeedExtend) |
2434 | StoreVal = |
2435 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); |
2436 | } else { |
2437 | StoreVal = DAG.getUNDEF(ExtendedVT); |
2438 | } |
2439 | Ops.push_back(StoreVal); |
2440 | |
2441 | if (i + 3 < NumElts) { |
2442 | StoreVal = OutVals[i + 3]; |
2443 | if (NeedExtend) |
2444 | StoreVal = |
2445 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); |
2446 | } else { |
2447 | StoreVal = DAG.getUNDEF(ExtendedVT); |
2448 | } |
2449 | Ops.push_back(StoreVal); |
2450 | } |
2451 | |
2452 | // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); |
2453 | Chain = |
2454 | DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, |
2455 | EltVT, MachinePointerInfo()); |
2456 | Offset += PerStoreOffset; |
2457 | } |
2458 | } |
2459 | } else { |
2460 | SmallVector<EVT, 16> ValVTs; |
2461 | SmallVector<uint64_t, 16> Offsets; |
2462 | ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); |
2463 | assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition")((ValVTs.size() == OutVals.size() && "Bad return value decomposition" ) ? static_cast<void> (0) : __assert_fail ("ValVTs.size() == OutVals.size() && \"Bad return value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2463, __PRETTY_FUNCTION__)); |
2464 | |
2465 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
2466 | SDValue theVal = OutVals[i]; |
2467 | EVT TheValType = theVal.getValueType(); |
2468 | unsigned numElems = 1; |
2469 | if (TheValType.isVector()) |
2470 | numElems = TheValType.getVectorNumElements(); |
2471 | for (unsigned j = 0, je = numElems; j != je; ++j) { |
2472 | SDValue TmpVal = theVal; |
2473 | if (TheValType.isVector()) |
2474 | TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, |
2475 | TheValType.getVectorElementType(), TmpVal, |
2476 | DAG.getIntPtrConstant(j)); |
2477 | EVT TheStoreType = ValVTs[i]; |
2478 | if (RetTy->isIntegerTy() && |
2479 | TD->getTypeAllocSizeInBits(RetTy) < 32) { |
2480 | // The following zero-extension is for integer types only, and |
2481 | // specifically not for aggregates. |
2482 | TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); |
2483 | TheStoreType = MVT::i32; |
2484 | } |
2485 | else if (TmpVal.getValueType().getSizeInBits() < 16) |
2486 | TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); |
2487 | |
2488 | SDValue Ops[] = { |
2489 | Chain, |
2490 | DAG.getConstant(Offsets[i], MVT::i32), |
2491 | TmpVal }; |
2492 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, |
2493 | DAG.getVTList(MVT::Other), Ops, |
2494 | TheStoreType, |
2495 | MachinePointerInfo()); |
2496 | } |
2497 | } |
2498 | } |
2499 | |
2500 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); |
2501 | } |
2502 | |
2503 | |
2504 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( |
2505 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, |
2506 | SelectionDAG &DAG) const { |
2507 | if (Constraint.length() > 1) |
2508 | return; |
2509 | else |
2510 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
2511 | } |
2512 | |
2513 | // NVPTX suuport vector of legal types of any length in Intrinsics because the |
2514 | // NVPTX specific type legalizer |
2515 | // will legalize them to the PTX supported length. |
2516 | bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { |
2517 | if (isTypeLegal(VT)) |
2518 | return true; |
2519 | if (VT.isVector()) { |
2520 | MVT eVT = VT.getVectorElementType(); |
2521 | if (isTypeLegal(eVT)) |
2522 | return true; |
2523 | } |
2524 | return false; |
2525 | } |
2526 | |
2527 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { |
2528 | switch (Intrinsic) { |
2529 | default: |
2530 | return 0; |
2531 | |
2532 | case Intrinsic::nvvm_tex_1d_v4f32_s32: |
2533 | return NVPTXISD::Tex1DFloatS32; |
2534 | case Intrinsic::nvvm_tex_1d_v4f32_f32: |
2535 | return NVPTXISD::Tex1DFloatFloat; |
2536 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: |
2537 | return NVPTXISD::Tex1DFloatFloatLevel; |
2538 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: |
2539 | return NVPTXISD::Tex1DFloatFloatGrad; |
2540 | case Intrinsic::nvvm_tex_1d_v4s32_s32: |
2541 | return NVPTXISD::Tex1DS32S32; |
2542 | case Intrinsic::nvvm_tex_1d_v4s32_f32: |
2543 | return NVPTXISD::Tex1DS32Float; |
2544 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: |
2545 | return NVPTXISD::Tex1DS32FloatLevel; |
2546 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: |
2547 | return NVPTXISD::Tex1DS32FloatGrad; |
2548 | case Intrinsic::nvvm_tex_1d_v4u32_s32: |
2549 | return NVPTXISD::Tex1DU32S32; |
2550 | case Intrinsic::nvvm_tex_1d_v4u32_f32: |
2551 | return NVPTXISD::Tex1DU32Float; |
2552 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: |
2553 | return NVPTXISD::Tex1DU32FloatLevel; |
2554 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: |
2555 | return NVPTXISD::Tex1DU32FloatGrad; |
2556 | |
2557 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: |
2558 | return NVPTXISD::Tex1DArrayFloatS32; |
2559 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: |
2560 | return NVPTXISD::Tex1DArrayFloatFloat; |
2561 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: |
2562 | return NVPTXISD::Tex1DArrayFloatFloatLevel; |
2563 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: |
2564 | return NVPTXISD::Tex1DArrayFloatFloatGrad; |
2565 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: |
2566 | return NVPTXISD::Tex1DArrayS32S32; |
2567 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: |
2568 | return NVPTXISD::Tex1DArrayS32Float; |
2569 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: |
2570 | return NVPTXISD::Tex1DArrayS32FloatLevel; |
2571 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: |
2572 | return NVPTXISD::Tex1DArrayS32FloatGrad; |
2573 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: |
2574 | return NVPTXISD::Tex1DArrayU32S32; |
2575 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: |
2576 | return NVPTXISD::Tex1DArrayU32Float; |
2577 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: |
2578 | return NVPTXISD::Tex1DArrayU32FloatLevel; |
2579 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: |
2580 | return NVPTXISD::Tex1DArrayU32FloatGrad; |
2581 | |
2582 | case Intrinsic::nvvm_tex_2d_v4f32_s32: |
2583 | return NVPTXISD::Tex2DFloatS32; |
2584 | case Intrinsic::nvvm_tex_2d_v4f32_f32: |
2585 | return NVPTXISD::Tex2DFloatFloat; |
2586 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: |
2587 | return NVPTXISD::Tex2DFloatFloatLevel; |
2588 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: |
2589 | return NVPTXISD::Tex2DFloatFloatGrad; |
2590 | case Intrinsic::nvvm_tex_2d_v4s32_s32: |
2591 | return NVPTXISD::Tex2DS32S32; |
2592 | case Intrinsic::nvvm_tex_2d_v4s32_f32: |
2593 | return NVPTXISD::Tex2DS32Float; |
2594 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: |
2595 | return NVPTXISD::Tex2DS32FloatLevel; |
2596 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: |
2597 | return NVPTXISD::Tex2DS32FloatGrad; |
2598 | case Intrinsic::nvvm_tex_2d_v4u32_s32: |
2599 | return NVPTXISD::Tex2DU32S32; |
2600 | case Intrinsic::nvvm_tex_2d_v4u32_f32: |
2601 | return NVPTXISD::Tex2DU32Float; |
2602 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: |
2603 | return NVPTXISD::Tex2DU32FloatLevel; |
2604 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: |
2605 | return NVPTXISD::Tex2DU32FloatGrad; |
2606 | |
2607 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: |
2608 | return NVPTXISD::Tex2DArrayFloatS32; |
2609 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: |
2610 | return NVPTXISD::Tex2DArrayFloatFloat; |
2611 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: |
2612 | return NVPTXISD::Tex2DArrayFloatFloatLevel; |
2613 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: |
2614 | return NVPTXISD::Tex2DArrayFloatFloatGrad; |
2615 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: |
2616 | return NVPTXISD::Tex2DArrayS32S32; |
2617 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: |
2618 | return NVPTXISD::Tex2DArrayS32Float; |
2619 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: |
2620 | return NVPTXISD::Tex2DArrayS32FloatLevel; |
2621 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: |
2622 | return NVPTXISD::Tex2DArrayS32FloatGrad; |
2623 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: |
2624 | return NVPTXISD::Tex2DArrayU32S32; |
2625 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: |
2626 | return NVPTXISD::Tex2DArrayU32Float; |
2627 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: |
2628 | return NVPTXISD::Tex2DArrayU32FloatLevel; |
2629 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: |
2630 | return NVPTXISD::Tex2DArrayU32FloatGrad; |
2631 | |
2632 | case Intrinsic::nvvm_tex_3d_v4f32_s32: |
2633 | return NVPTXISD::Tex3DFloatS32; |
2634 | case Intrinsic::nvvm_tex_3d_v4f32_f32: |
2635 | return NVPTXISD::Tex3DFloatFloat; |
2636 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: |
2637 | return NVPTXISD::Tex3DFloatFloatLevel; |
2638 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: |
2639 | return NVPTXISD::Tex3DFloatFloatGrad; |
2640 | case Intrinsic::nvvm_tex_3d_v4s32_s32: |
2641 | return NVPTXISD::Tex3DS32S32; |
2642 | case Intrinsic::nvvm_tex_3d_v4s32_f32: |
2643 | return NVPTXISD::Tex3DS32Float; |
2644 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: |
2645 | return NVPTXISD::Tex3DS32FloatLevel; |
2646 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: |
2647 | return NVPTXISD::Tex3DS32FloatGrad; |
2648 | case Intrinsic::nvvm_tex_3d_v4u32_s32: |
2649 | return NVPTXISD::Tex3DU32S32; |
2650 | case Intrinsic::nvvm_tex_3d_v4u32_f32: |
2651 | return NVPTXISD::Tex3DU32Float; |
2652 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: |
2653 | return NVPTXISD::Tex3DU32FloatLevel; |
2654 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: |
2655 | return NVPTXISD::Tex3DU32FloatGrad; |
2656 | |
2657 | case Intrinsic::nvvm_tex_cube_v4f32_f32: |
2658 | return NVPTXISD::TexCubeFloatFloat; |
2659 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: |
2660 | return NVPTXISD::TexCubeFloatFloatLevel; |
2661 | case Intrinsic::nvvm_tex_cube_v4s32_f32: |
2662 | return NVPTXISD::TexCubeS32Float; |
2663 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: |
2664 | return NVPTXISD::TexCubeS32FloatLevel; |
2665 | case Intrinsic::nvvm_tex_cube_v4u32_f32: |
2666 | return NVPTXISD::TexCubeU32Float; |
2667 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: |
2668 | return NVPTXISD::TexCubeU32FloatLevel; |
2669 | |
2670 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: |
2671 | return NVPTXISD::TexCubeArrayFloatFloat; |
2672 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: |
2673 | return NVPTXISD::TexCubeArrayFloatFloatLevel; |
2674 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: |
2675 | return NVPTXISD::TexCubeArrayS32Float; |
2676 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: |
2677 | return NVPTXISD::TexCubeArrayS32FloatLevel; |
2678 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: |
2679 | return NVPTXISD::TexCubeArrayU32Float; |
2680 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: |
2681 | return NVPTXISD::TexCubeArrayU32FloatLevel; |
2682 | |
2683 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: |
2684 | return NVPTXISD::Tld4R2DFloatFloat; |
2685 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: |
2686 | return NVPTXISD::Tld4G2DFloatFloat; |
2687 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: |
2688 | return NVPTXISD::Tld4B2DFloatFloat; |
2689 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: |
2690 | return NVPTXISD::Tld4A2DFloatFloat; |
2691 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: |
2692 | return NVPTXISD::Tld4R2DS64Float; |
2693 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: |
2694 | return NVPTXISD::Tld4G2DS64Float; |
2695 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: |
2696 | return NVPTXISD::Tld4B2DS64Float; |
2697 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: |
2698 | return NVPTXISD::Tld4A2DS64Float; |
2699 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: |
2700 | return NVPTXISD::Tld4R2DU64Float; |
2701 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: |
2702 | return NVPTXISD::Tld4G2DU64Float; |
2703 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: |
2704 | return NVPTXISD::Tld4B2DU64Float; |
2705 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: |
2706 | return NVPTXISD::Tld4A2DU64Float; |
2707 | |
2708 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: |
2709 | return NVPTXISD::TexUnified1DFloatS32; |
2710 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: |
2711 | return NVPTXISD::TexUnified1DFloatFloat; |
2712 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: |
2713 | return NVPTXISD::TexUnified1DFloatFloatLevel; |
2714 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: |
2715 | return NVPTXISD::TexUnified1DFloatFloatGrad; |
2716 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: |
2717 | return NVPTXISD::TexUnified1DS32S32; |
2718 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: |
2719 | return NVPTXISD::TexUnified1DS32Float; |
2720 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: |
2721 | return NVPTXISD::TexUnified1DS32FloatLevel; |
2722 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: |
2723 | return NVPTXISD::TexUnified1DS32FloatGrad; |
2724 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: |
2725 | return NVPTXISD::TexUnified1DU32S32; |
2726 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: |
2727 | return NVPTXISD::TexUnified1DU32Float; |
2728 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: |
2729 | return NVPTXISD::TexUnified1DU32FloatLevel; |
2730 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: |
2731 | return NVPTXISD::TexUnified1DU32FloatGrad; |
2732 | |
2733 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: |
2734 | return NVPTXISD::TexUnified1DArrayFloatS32; |
2735 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: |
2736 | return NVPTXISD::TexUnified1DArrayFloatFloat; |
2737 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: |
2738 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; |
2739 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: |
2740 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; |
2741 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: |
2742 | return NVPTXISD::TexUnified1DArrayS32S32; |
2743 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: |
2744 | return NVPTXISD::TexUnified1DArrayS32Float; |
2745 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: |
2746 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; |
2747 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: |
2748 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; |
2749 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: |
2750 | return NVPTXISD::TexUnified1DArrayU32S32; |
2751 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: |
2752 | return NVPTXISD::TexUnified1DArrayU32Float; |
2753 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: |
2754 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; |
2755 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: |
2756 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; |
2757 | |
2758 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: |
2759 | return NVPTXISD::TexUnified2DFloatS32; |
2760 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: |
2761 | return NVPTXISD::TexUnified2DFloatFloat; |
2762 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: |
2763 | return NVPTXISD::TexUnified2DFloatFloatLevel; |
2764 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: |
2765 | return NVPTXISD::TexUnified2DFloatFloatGrad; |
2766 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: |
2767 | return NVPTXISD::TexUnified2DS32S32; |
2768 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: |
2769 | return NVPTXISD::TexUnified2DS32Float; |
2770 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: |
2771 | return NVPTXISD::TexUnified2DS32FloatLevel; |
2772 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: |
2773 | return NVPTXISD::TexUnified2DS32FloatGrad; |
2774 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: |
2775 | return NVPTXISD::TexUnified2DU32S32; |
2776 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: |
2777 | return NVPTXISD::TexUnified2DU32Float; |
2778 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: |
2779 | return NVPTXISD::TexUnified2DU32FloatLevel; |
2780 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: |
2781 | return NVPTXISD::TexUnified2DU32FloatGrad; |
2782 | |
2783 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: |
2784 | return NVPTXISD::TexUnified2DArrayFloatS32; |
2785 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: |
2786 | return NVPTXISD::TexUnified2DArrayFloatFloat; |
2787 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: |
2788 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; |
2789 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: |
2790 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; |
2791 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: |
2792 | return NVPTXISD::TexUnified2DArrayS32S32; |
2793 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: |
2794 | return NVPTXISD::TexUnified2DArrayS32Float; |
2795 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: |
2796 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; |
2797 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: |
2798 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; |
2799 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: |
2800 | return NVPTXISD::TexUnified2DArrayU32S32; |
2801 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: |
2802 | return NVPTXISD::TexUnified2DArrayU32Float; |
2803 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: |
2804 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; |
2805 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: |
2806 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; |
2807 | |
2808 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: |
2809 | return NVPTXISD::TexUnified3DFloatS32; |
2810 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: |
2811 | return NVPTXISD::TexUnified3DFloatFloat; |
2812 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: |
2813 | return NVPTXISD::TexUnified3DFloatFloatLevel; |
2814 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: |
2815 | return NVPTXISD::TexUnified3DFloatFloatGrad; |
2816 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: |
2817 | return NVPTXISD::TexUnified3DS32S32; |
2818 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: |
2819 | return NVPTXISD::TexUnified3DS32Float; |
2820 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: |
2821 | return NVPTXISD::TexUnified3DS32FloatLevel; |
2822 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: |
2823 | return NVPTXISD::TexUnified3DS32FloatGrad; |
2824 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: |
2825 | return NVPTXISD::TexUnified3DU32S32; |
2826 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: |
2827 | return NVPTXISD::TexUnified3DU32Float; |
2828 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: |
2829 | return NVPTXISD::TexUnified3DU32FloatLevel; |
2830 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: |
2831 | return NVPTXISD::TexUnified3DU32FloatGrad; |
2832 | |
2833 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: |
2834 | return NVPTXISD::TexUnifiedCubeFloatFloat; |
2835 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: |
2836 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; |
2837 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: |
2838 | return NVPTXISD::TexUnifiedCubeS32Float; |
2839 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: |
2840 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; |
2841 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: |
2842 | return NVPTXISD::TexUnifiedCubeU32Float; |
2843 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: |
2844 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; |
2845 | |
2846 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: |
2847 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; |
2848 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: |
2849 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; |
2850 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: |
2851 | return NVPTXISD::TexUnifiedCubeArrayS32Float; |
2852 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: |
2853 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; |
2854 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: |
2855 | return NVPTXISD::TexUnifiedCubeArrayU32Float; |
2856 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: |
2857 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; |
2858 | |
2859 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: |
2860 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; |
2861 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: |
2862 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; |
2863 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: |
2864 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; |
2865 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: |
2866 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; |
2867 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: |
2868 | return NVPTXISD::Tld4UnifiedR2DS64Float; |
2869 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: |
2870 | return NVPTXISD::Tld4UnifiedG2DS64Float; |
2871 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: |
2872 | return NVPTXISD::Tld4UnifiedB2DS64Float; |
2873 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: |
2874 | return NVPTXISD::Tld4UnifiedA2DS64Float; |
2875 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: |
2876 | return NVPTXISD::Tld4UnifiedR2DU64Float; |
2877 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: |
2878 | return NVPTXISD::Tld4UnifiedG2DU64Float; |
2879 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: |
2880 | return NVPTXISD::Tld4UnifiedB2DU64Float; |
2881 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: |
2882 | return NVPTXISD::Tld4UnifiedA2DU64Float; |
2883 | } |
2884 | } |
2885 | |
2886 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { |
2887 | switch (Intrinsic) { |
2888 | default: |
2889 | return 0; |
2890 | case Intrinsic::nvvm_suld_1d_i8_clamp: |
2891 | return NVPTXISD::Suld1DI8Clamp; |
2892 | case Intrinsic::nvvm_suld_1d_i16_clamp: |
2893 | return NVPTXISD::Suld1DI16Clamp; |
2894 | case Intrinsic::nvvm_suld_1d_i32_clamp: |
2895 | return NVPTXISD::Suld1DI32Clamp; |
2896 | case Intrinsic::nvvm_suld_1d_i64_clamp: |
2897 | return NVPTXISD::Suld1DI64Clamp; |
2898 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: |
2899 | return NVPTXISD::Suld1DV2I8Clamp; |
2900 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: |
2901 | return NVPTXISD::Suld1DV2I16Clamp; |
2902 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: |
2903 | return NVPTXISD::Suld1DV2I32Clamp; |
2904 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: |
2905 | return NVPTXISD::Suld1DV2I64Clamp; |
2906 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: |
2907 | return NVPTXISD::Suld1DV4I8Clamp; |
2908 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: |
2909 | return NVPTXISD::Suld1DV4I16Clamp; |
2910 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: |
2911 | return NVPTXISD::Suld1DV4I32Clamp; |
2912 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: |
2913 | return NVPTXISD::Suld1DArrayI8Clamp; |
2914 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: |
2915 | return NVPTXISD::Suld1DArrayI16Clamp; |
2916 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: |
2917 | return NVPTXISD::Suld1DArrayI32Clamp; |
2918 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: |
2919 | return NVPTXISD::Suld1DArrayI64Clamp; |
2920 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: |
2921 | return NVPTXISD::Suld1DArrayV2I8Clamp; |
2922 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: |
2923 | return NVPTXISD::Suld1DArrayV2I16Clamp; |
2924 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: |
2925 | return NVPTXISD::Suld1DArrayV2I32Clamp; |
2926 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: |
2927 | return NVPTXISD::Suld1DArrayV2I64Clamp; |
2928 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: |
2929 | return NVPTXISD::Suld1DArrayV4I8Clamp; |
2930 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: |
2931 | return NVPTXISD::Suld1DArrayV4I16Clamp; |
2932 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: |
2933 | return NVPTXISD::Suld1DArrayV4I32Clamp; |
2934 | case Intrinsic::nvvm_suld_2d_i8_clamp: |
2935 | return NVPTXISD::Suld2DI8Clamp; |
2936 | case Intrinsic::nvvm_suld_2d_i16_clamp: |
2937 | return NVPTXISD::Suld2DI16Clamp; |
2938 | case Intrinsic::nvvm_suld_2d_i32_clamp: |
2939 | return NVPTXISD::Suld2DI32Clamp; |
2940 | case Intrinsic::nvvm_suld_2d_i64_clamp: |
2941 | return NVPTXISD::Suld2DI64Clamp; |
2942 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: |
2943 | return NVPTXISD::Suld2DV2I8Clamp; |
2944 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: |
2945 | return NVPTXISD::Suld2DV2I16Clamp; |
2946 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: |
2947 | return NVPTXISD::Suld2DV2I32Clamp; |
2948 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: |
2949 | return NVPTXISD::Suld2DV2I64Clamp; |
2950 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: |
2951 | return NVPTXISD::Suld2DV4I8Clamp; |
2952 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: |
2953 | return NVPTXISD::Suld2DV4I16Clamp; |
2954 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: |
2955 | return NVPTXISD::Suld2DV4I32Clamp; |
2956 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: |
2957 | return NVPTXISD::Suld2DArrayI8Clamp; |
2958 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: |
2959 | return NVPTXISD::Suld2DArrayI16Clamp; |
2960 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: |
2961 | return NVPTXISD::Suld2DArrayI32Clamp; |
2962 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: |
2963 | return NVPTXISD::Suld2DArrayI64Clamp; |
2964 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: |
2965 | return NVPTXISD::Suld2DArrayV2I8Clamp; |
2966 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: |
2967 | return NVPTXISD::Suld2DArrayV2I16Clamp; |
2968 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: |
2969 | return NVPTXISD::Suld2DArrayV2I32Clamp; |
2970 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: |
2971 | return NVPTXISD::Suld2DArrayV2I64Clamp; |
2972 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: |
2973 | return NVPTXISD::Suld2DArrayV4I8Clamp; |
2974 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: |
2975 | return NVPTXISD::Suld2DArrayV4I16Clamp; |
2976 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: |
2977 | return NVPTXISD::Suld2DArrayV4I32Clamp; |
2978 | case Intrinsic::nvvm_suld_3d_i8_clamp: |
2979 | return NVPTXISD::Suld3DI8Clamp; |
2980 | case Intrinsic::nvvm_suld_3d_i16_clamp: |
2981 | return NVPTXISD::Suld3DI16Clamp; |
2982 | case Intrinsic::nvvm_suld_3d_i32_clamp: |
2983 | return NVPTXISD::Suld3DI32Clamp; |
2984 | case Intrinsic::nvvm_suld_3d_i64_clamp: |
2985 | return NVPTXISD::Suld3DI64Clamp; |
2986 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: |
2987 | return NVPTXISD::Suld3DV2I8Clamp; |
2988 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: |
2989 | return NVPTXISD::Suld3DV2I16Clamp; |
2990 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: |
2991 | return NVPTXISD::Suld3DV2I32Clamp; |
2992 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: |
2993 | return NVPTXISD::Suld3DV2I64Clamp; |
2994 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: |
2995 | return NVPTXISD::Suld3DV4I8Clamp; |
2996 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: |
2997 | return NVPTXISD::Suld3DV4I16Clamp; |
2998 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: |
2999 | return NVPTXISD::Suld3DV4I32Clamp; |
3000 | case Intrinsic::nvvm_suld_1d_i8_trap: |
3001 | return NVPTXISD::Suld1DI8Trap; |
3002 | case Intrinsic::nvvm_suld_1d_i16_trap: |
3003 | return NVPTXISD::Suld1DI16Trap; |
3004 | case Intrinsic::nvvm_suld_1d_i32_trap: |
3005 | return NVPTXISD::Suld1DI32Trap; |
3006 | case Intrinsic::nvvm_suld_1d_i64_trap: |
3007 | return NVPTXISD::Suld1DI64Trap; |
3008 | case Intrinsic::nvvm_suld_1d_v2i8_trap: |
3009 | return NVPTXISD::Suld1DV2I8Trap; |
3010 | case Intrinsic::nvvm_suld_1d_v2i16_trap: |
3011 | return NVPTXISD::Suld1DV2I16Trap; |
3012 | case Intrinsic::nvvm_suld_1d_v2i32_trap: |
3013 | return NVPTXISD::Suld1DV2I32Trap; |
3014 | case Intrinsic::nvvm_suld_1d_v2i64_trap: |
3015 | return NVPTXISD::Suld1DV2I64Trap; |
3016 | case Intrinsic::nvvm_suld_1d_v4i8_trap: |
3017 | return NVPTXISD::Suld1DV4I8Trap; |
3018 | case Intrinsic::nvvm_suld_1d_v4i16_trap: |
3019 | return NVPTXISD::Suld1DV4I16Trap; |
3020 | case Intrinsic::nvvm_suld_1d_v4i32_trap: |
3021 | return NVPTXISD::Suld1DV4I32Trap; |
3022 | case Intrinsic::nvvm_suld_1d_array_i8_trap: |
3023 | return NVPTXISD::Suld1DArrayI8Trap; |
3024 | case Intrinsic::nvvm_suld_1d_array_i16_trap: |
3025 | return NVPTXISD::Suld1DArrayI16Trap; |
3026 | case Intrinsic::nvvm_suld_1d_array_i32_trap: |
3027 | return NVPTXISD::Suld1DArrayI32Trap; |
3028 | case Intrinsic::nvvm_suld_1d_array_i64_trap: |
3029 | return NVPTXISD::Suld1DArrayI64Trap; |
3030 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: |
3031 | return NVPTXISD::Suld1DArrayV2I8Trap; |
3032 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: |
3033 | return NVPTXISD::Suld1DArrayV2I16Trap; |
3034 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: |
3035 | return NVPTXISD::Suld1DArrayV2I32Trap; |
3036 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: |
3037 | return NVPTXISD::Suld1DArrayV2I64Trap; |
3038 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: |
3039 | return NVPTXISD::Suld1DArrayV4I8Trap; |
3040 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: |
3041 | return NVPTXISD::Suld1DArrayV4I16Trap; |
3042 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: |
3043 | return NVPTXISD::Suld1DArrayV4I32Trap; |
3044 | case Intrinsic::nvvm_suld_2d_i8_trap: |
3045 | return NVPTXISD::Suld2DI8Trap; |
3046 | case Intrinsic::nvvm_suld_2d_i16_trap: |
3047 | return NVPTXISD::Suld2DI16Trap; |
3048 | case Intrinsic::nvvm_suld_2d_i32_trap: |
3049 | return NVPTXISD::Suld2DI32Trap; |
3050 | case Intrinsic::nvvm_suld_2d_i64_trap: |
3051 | return NVPTXISD::Suld2DI64Trap; |
3052 | case Intrinsic::nvvm_suld_2d_v2i8_trap: |
3053 | return NVPTXISD::Suld2DV2I8Trap; |
3054 | case Intrinsic::nvvm_suld_2d_v2i16_trap: |
3055 | return NVPTXISD::Suld2DV2I16Trap; |
3056 | case Intrinsic::nvvm_suld_2d_v2i32_trap: |
3057 | return NVPTXISD::Suld2DV2I32Trap; |
3058 | case Intrinsic::nvvm_suld_2d_v2i64_trap: |
3059 | return NVPTXISD::Suld2DV2I64Trap; |
3060 | case Intrinsic::nvvm_suld_2d_v4i8_trap: |
3061 | return NVPTXISD::Suld2DV4I8Trap; |
3062 | case Intrinsic::nvvm_suld_2d_v4i16_trap: |
3063 | return NVPTXISD::Suld2DV4I16Trap; |
3064 | case Intrinsic::nvvm_suld_2d_v4i32_trap: |
3065 | return NVPTXISD::Suld2DV4I32Trap; |
3066 | case Intrinsic::nvvm_suld_2d_array_i8_trap: |
3067 | return NVPTXISD::Suld2DArrayI8Trap; |
3068 | case Intrinsic::nvvm_suld_2d_array_i16_trap: |
3069 | return NVPTXISD::Suld2DArrayI16Trap; |
3070 | case Intrinsic::nvvm_suld_2d_array_i32_trap: |
3071 | return NVPTXISD::Suld2DArrayI32Trap; |
3072 | case Intrinsic::nvvm_suld_2d_array_i64_trap: |
3073 | return NVPTXISD::Suld2DArrayI64Trap; |
3074 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: |
3075 | return NVPTXISD::Suld2DArrayV2I8Trap; |
3076 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: |
3077 | return NVPTXISD::Suld2DArrayV2I16Trap; |
3078 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: |
3079 | return NVPTXISD::Suld2DArrayV2I32Trap; |
3080 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: |
3081 | return NVPTXISD::Suld2DArrayV2I64Trap; |
3082 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: |
3083 | return NVPTXISD::Suld2DArrayV4I8Trap; |
3084 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: |
3085 | return NVPTXISD::Suld2DArrayV4I16Trap; |
3086 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: |
3087 | return NVPTXISD::Suld2DArrayV4I32Trap; |
3088 | case Intrinsic::nvvm_suld_3d_i8_trap: |
3089 | return NVPTXISD::Suld3DI8Trap; |
3090 | case Intrinsic::nvvm_suld_3d_i16_trap: |
3091 | return NVPTXISD::Suld3DI16Trap; |
3092 | case Intrinsic::nvvm_suld_3d_i32_trap: |
3093 | return NVPTXISD::Suld3DI32Trap; |
3094 | case Intrinsic::nvvm_suld_3d_i64_trap: |
3095 | return NVPTXISD::Suld3DI64Trap; |
3096 | case Intrinsic::nvvm_suld_3d_v2i8_trap: |
3097 | return NVPTXISD::Suld3DV2I8Trap; |
3098 | case Intrinsic::nvvm_suld_3d_v2i16_trap: |
3099 | return NVPTXISD::Suld3DV2I16Trap; |
3100 | case Intrinsic::nvvm_suld_3d_v2i32_trap: |
3101 | return NVPTXISD::Suld3DV2I32Trap; |
3102 | case Intrinsic::nvvm_suld_3d_v2i64_trap: |
3103 | return NVPTXISD::Suld3DV2I64Trap; |
3104 | case Intrinsic::nvvm_suld_3d_v4i8_trap: |
3105 | return NVPTXISD::Suld3DV4I8Trap; |
3106 | case Intrinsic::nvvm_suld_3d_v4i16_trap: |
3107 | return NVPTXISD::Suld3DV4I16Trap; |
3108 | case Intrinsic::nvvm_suld_3d_v4i32_trap: |
3109 | return NVPTXISD::Suld3DV4I32Trap; |
3110 | case Intrinsic::nvvm_suld_1d_i8_zero: |
3111 | return NVPTXISD::Suld1DI8Zero; |
3112 | case Intrinsic::nvvm_suld_1d_i16_zero: |
3113 | return NVPTXISD::Suld1DI16Zero; |
3114 | case Intrinsic::nvvm_suld_1d_i32_zero: |
3115 | return NVPTXISD::Suld1DI32Zero; |
3116 | case Intrinsic::nvvm_suld_1d_i64_zero: |
3117 | return NVPTXISD::Suld1DI64Zero; |
3118 | case Intrinsic::nvvm_suld_1d_v2i8_zero: |
3119 | return NVPTXISD::Suld1DV2I8Zero; |
3120 | case Intrinsic::nvvm_suld_1d_v2i16_zero: |
3121 | return NVPTXISD::Suld1DV2I16Zero; |
3122 | case Intrinsic::nvvm_suld_1d_v2i32_zero: |
3123 | return NVPTXISD::Suld1DV2I32Zero; |
3124 | case Intrinsic::nvvm_suld_1d_v2i64_zero: |
3125 | return NVPTXISD::Suld1DV2I64Zero; |
3126 | case Intrinsic::nvvm_suld_1d_v4i8_zero: |
3127 | return NVPTXISD::Suld1DV4I8Zero; |
3128 | case Intrinsic::nvvm_suld_1d_v4i16_zero: |
3129 | return NVPTXISD::Suld1DV4I16Zero; |
3130 | case Intrinsic::nvvm_suld_1d_v4i32_zero: |
3131 | return NVPTXISD::Suld1DV4I32Zero; |
3132 | case Intrinsic::nvvm_suld_1d_array_i8_zero: |
3133 | return NVPTXISD::Suld1DArrayI8Zero; |
3134 | case Intrinsic::nvvm_suld_1d_array_i16_zero: |
3135 | return NVPTXISD::Suld1DArrayI16Zero; |
3136 | case Intrinsic::nvvm_suld_1d_array_i32_zero: |
3137 | return NVPTXISD::Suld1DArrayI32Zero; |
3138 | case Intrinsic::nvvm_suld_1d_array_i64_zero: |
3139 | return NVPTXISD::Suld1DArrayI64Zero; |
3140 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: |
3141 | return NVPTXISD::Suld1DArrayV2I8Zero; |
3142 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: |
3143 | return NVPTXISD::Suld1DArrayV2I16Zero; |
3144 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: |
3145 | return NVPTXISD::Suld1DArrayV2I32Zero; |
3146 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: |
3147 | return NVPTXISD::Suld1DArrayV2I64Zero; |
3148 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: |
3149 | return NVPTXISD::Suld1DArrayV4I8Zero; |
3150 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: |
3151 | return NVPTXISD::Suld1DArrayV4I16Zero; |
3152 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: |
3153 | return NVPTXISD::Suld1DArrayV4I32Zero; |
3154 | case Intrinsic::nvvm_suld_2d_i8_zero: |
3155 | return NVPTXISD::Suld2DI8Zero; |
3156 | case Intrinsic::nvvm_suld_2d_i16_zero: |
3157 | return NVPTXISD::Suld2DI16Zero; |
3158 | case Intrinsic::nvvm_suld_2d_i32_zero: |
3159 | return NVPTXISD::Suld2DI32Zero; |
3160 | case Intrinsic::nvvm_suld_2d_i64_zero: |
3161 | return NVPTXISD::Suld2DI64Zero; |
3162 | case Intrinsic::nvvm_suld_2d_v2i8_zero: |
3163 | return NVPTXISD::Suld2DV2I8Zero; |
3164 | case Intrinsic::nvvm_suld_2d_v2i16_zero: |
3165 | return NVPTXISD::Suld2DV2I16Zero; |
3166 | case Intrinsic::nvvm_suld_2d_v2i32_zero: |
3167 | return NVPTXISD::Suld2DV2I32Zero; |
3168 | case Intrinsic::nvvm_suld_2d_v2i64_zero: |
3169 | return NVPTXISD::Suld2DV2I64Zero; |
3170 | case Intrinsic::nvvm_suld_2d_v4i8_zero: |
3171 | return NVPTXISD::Suld2DV4I8Zero; |
3172 | case Intrinsic::nvvm_suld_2d_v4i16_zero: |
3173 | return NVPTXISD::Suld2DV4I16Zero; |
3174 | case Intrinsic::nvvm_suld_2d_v4i32_zero: |
3175 | return NVPTXISD::Suld2DV4I32Zero; |
3176 | case Intrinsic::nvvm_suld_2d_array_i8_zero: |
3177 | return NVPTXISD::Suld2DArrayI8Zero; |
3178 | case Intrinsic::nvvm_suld_2d_array_i16_zero: |
3179 | return NVPTXISD::Suld2DArrayI16Zero; |
3180 | case Intrinsic::nvvm_suld_2d_array_i32_zero: |
3181 | return NVPTXISD::Suld2DArrayI32Zero; |
3182 | case Intrinsic::nvvm_suld_2d_array_i64_zero: |
3183 | return NVPTXISD::Suld2DArrayI64Zero; |
3184 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: |
3185 | return NVPTXISD::Suld2DArrayV2I8Zero; |
3186 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: |
3187 | return NVPTXISD::Suld2DArrayV2I16Zero; |
3188 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: |
3189 | return NVPTXISD::Suld2DArrayV2I32Zero; |
3190 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: |
3191 | return NVPTXISD::Suld2DArrayV2I64Zero; |
3192 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: |
3193 | return NVPTXISD::Suld2DArrayV4I8Zero; |
3194 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: |
3195 | return NVPTXISD::Suld2DArrayV4I16Zero; |
3196 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: |
3197 | return NVPTXISD::Suld2DArrayV4I32Zero; |
3198 | case Intrinsic::nvvm_suld_3d_i8_zero: |
3199 | return NVPTXISD::Suld3DI8Zero; |
3200 | case Intrinsic::nvvm_suld_3d_i16_zero: |
3201 | return NVPTXISD::Suld3DI16Zero; |
3202 | case Intrinsic::nvvm_suld_3d_i32_zero: |
3203 | return NVPTXISD::Suld3DI32Zero; |
3204 | case Intrinsic::nvvm_suld_3d_i64_zero: |
3205 | return NVPTXISD::Suld3DI64Zero; |
3206 | case Intrinsic::nvvm_suld_3d_v2i8_zero: |
3207 | return NVPTXISD::Suld3DV2I8Zero; |
3208 | case Intrinsic::nvvm_suld_3d_v2i16_zero: |
3209 | return NVPTXISD::Suld3DV2I16Zero; |
3210 | case Intrinsic::nvvm_suld_3d_v2i32_zero: |
3211 | return NVPTXISD::Suld3DV2I32Zero; |
3212 | case Intrinsic::nvvm_suld_3d_v2i64_zero: |
3213 | return NVPTXISD::Suld3DV2I64Zero; |
3214 | case Intrinsic::nvvm_suld_3d_v4i8_zero: |
3215 | return NVPTXISD::Suld3DV4I8Zero; |
3216 | case Intrinsic::nvvm_suld_3d_v4i16_zero: |
3217 | return NVPTXISD::Suld3DV4I16Zero; |
3218 | case Intrinsic::nvvm_suld_3d_v4i32_zero: |
3219 | return NVPTXISD::Suld3DV4I32Zero; |
3220 | } |
3221 | } |
3222 | |
3223 | // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as |
3224 | // TgtMemIntrinsic |
3225 | // because we need the information that is only available in the "Value" type |
3226 | // of destination |
3227 | // pointer. In particular, the address space information. |
3228 | bool NVPTXTargetLowering::getTgtMemIntrinsic( |
3229 | IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { |
3230 | switch (Intrinsic) { |
3231 | default: |
3232 | return false; |
3233 | |
3234 | case Intrinsic::nvvm_atomic_load_add_f32: |
3235 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3236 | Info.memVT = MVT::f32; |
3237 | Info.ptrVal = I.getArgOperand(0); |
3238 | Info.offset = 0; |
3239 | Info.vol = 0; |
3240 | Info.readMem = true; |
3241 | Info.writeMem = true; |
3242 | Info.align = 0; |
3243 | return true; |
3244 | |
3245 | case Intrinsic::nvvm_atomic_load_inc_32: |
3246 | case Intrinsic::nvvm_atomic_load_dec_32: |
3247 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3248 | Info.memVT = MVT::i32; |
3249 | Info.ptrVal = I.getArgOperand(0); |
3250 | Info.offset = 0; |
3251 | Info.vol = 0; |
3252 | Info.readMem = true; |
3253 | Info.writeMem = true; |
3254 | Info.align = 0; |
3255 | return true; |
3256 | |
3257 | case Intrinsic::nvvm_ldu_global_i: |
3258 | case Intrinsic::nvvm_ldu_global_f: |
3259 | case Intrinsic::nvvm_ldu_global_p: { |
3260 | |
3261 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3262 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) |
3263 | Info.memVT = getValueType(I.getType()); |
3264 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) |
3265 | Info.memVT = getPointerTy(); |
3266 | else |
3267 | Info.memVT = getValueType(I.getType()); |
3268 | Info.ptrVal = I.getArgOperand(0); |
3269 | Info.offset = 0; |
3270 | Info.vol = 0; |
3271 | Info.readMem = true; |
3272 | Info.writeMem = false; |
3273 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); |
3274 | |
3275 | return true; |
3276 | } |
3277 | case Intrinsic::nvvm_ldg_global_i: |
3278 | case Intrinsic::nvvm_ldg_global_f: |
3279 | case Intrinsic::nvvm_ldg_global_p: { |
3280 | |
3281 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
3282 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) |
3283 | Info.memVT = getValueType(I.getType()); |
3284 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) |
3285 | Info.memVT = getPointerTy(); |
3286 | else |
3287 | Info.memVT = getValueType(I.getType()); |
3288 | Info.ptrVal = I.getArgOperand(0); |
3289 | Info.offset = 0; |
3290 | Info.vol = 0; |
3291 | Info.readMem = true; |
3292 | Info.writeMem = false; |
3293 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); |
3294 | |
3295 | return true; |
3296 | } |
3297 | |
3298 | case Intrinsic::nvvm_tex_1d_v4f32_s32: |
3299 | case Intrinsic::nvvm_tex_1d_v4f32_f32: |
3300 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: |
3301 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: |
3302 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: |
3303 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: |
3304 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: |
3305 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: |
3306 | case Intrinsic::nvvm_tex_2d_v4f32_s32: |
3307 | case Intrinsic::nvvm_tex_2d_v4f32_f32: |
3308 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: |
3309 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: |
3310 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: |
3311 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: |
3312 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: |
3313 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: |
3314 | case Intrinsic::nvvm_tex_3d_v4f32_s32: |
3315 | case Intrinsic::nvvm_tex_3d_v4f32_f32: |
3316 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: |
3317 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: |
3318 | case Intrinsic::nvvm_tex_cube_v4f32_f32: |
3319 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: |
3320 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: |
3321 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: |
3322 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: |
3323 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: |
3324 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: |
3325 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: |
3326 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: |
3327 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: |
3328 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: |
3329 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: |
3330 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: |
3331 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: |
3332 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: |
3333 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: |
3334 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: |
3335 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: |
3336 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: |
3337 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: |
3338 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: |
3339 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: |
3340 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: |
3341 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: |
3342 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: |
3343 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: |
3344 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: |
3345 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: |
3346 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: |
3347 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: |
3348 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: |
3349 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: |
3350 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: |
3351 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: |
3352 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: |
3353 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { |
3354 | Info.opc = getOpcForTextureInstr(Intrinsic); |
3355 | Info.memVT = MVT::v4f32; |
3356 | Info.ptrVal = nullptr; |
3357 | Info.offset = 0; |
3358 | Info.vol = 0; |
3359 | Info.readMem = true; |
3360 | Info.writeMem = false; |
3361 | Info.align = 16; |
3362 | return true; |
3363 | } |
3364 | case Intrinsic::nvvm_tex_1d_v4s32_s32: |
3365 | case Intrinsic::nvvm_tex_1d_v4s32_f32: |
3366 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: |
3367 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: |
3368 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: |
3369 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: |
3370 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: |
3371 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: |
3372 | case Intrinsic::nvvm_tex_2d_v4s32_s32: |
3373 | case Intrinsic::nvvm_tex_2d_v4s32_f32: |
3374 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: |
3375 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: |
3376 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: |
3377 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: |
3378 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: |
3379 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: |
3380 | case Intrinsic::nvvm_tex_3d_v4s32_s32: |
3381 | case Intrinsic::nvvm_tex_3d_v4s32_f32: |
3382 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: |
3383 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: |
3384 | case Intrinsic::nvvm_tex_cube_v4s32_f32: |
3385 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: |
3386 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: |
3387 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: |
3388 | case Intrinsic::nvvm_tex_cube_v4u32_f32: |
3389 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: |
3390 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: |
3391 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: |
3392 | case Intrinsic::nvvm_tex_1d_v4u32_s32: |
3393 | case Intrinsic::nvvm_tex_1d_v4u32_f32: |
3394 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: |
3395 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: |
3396 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: |
3397 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: |
3398 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: |
3399 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: |
3400 | case Intrinsic::nvvm_tex_2d_v4u32_s32: |
3401 | case Intrinsic::nvvm_tex_2d_v4u32_f32: |
3402 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: |
3403 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: |
3404 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: |
3405 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: |
3406 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: |
3407 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: |
3408 | case Intrinsic::nvvm_tex_3d_v4u32_s32: |
3409 | case Intrinsic::nvvm_tex_3d_v4u32_f32: |
3410 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: |
3411 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: |
3412 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: |
3413 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: |
3414 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: |
3415 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: |
3416 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: |
3417 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: |
3418 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: |
3419 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: |
3420 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: |
3421 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: |
3422 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: |
3423 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: |
3424 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: |
3425 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: |
3426 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: |
3427 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: |
3428 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: |
3429 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: |
3430 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: |
3431 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: |
3432 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: |
3433 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: |
3434 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: |
3435 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: |
3436 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: |
3437 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: |
3438 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: |
3439 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: |
3440 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: |
3441 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: |
3442 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: |
3443 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: |
3444 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: |
3445 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: |
3446 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: |
3447 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: |
3448 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: |
3449 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: |
3450 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: |
3451 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: |
3452 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: |
3453 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: |
3454 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: |
3455 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: |
3456 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: |
3457 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: |
3458 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: |
3459 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: |
3460 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: |
3461 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: |
3462 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: |
3463 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: |
3464 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: |
3465 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: |
3466 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: |
3467 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: |
3468 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: |
3469 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: |
3470 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: |
3471 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: |
3472 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: |
3473 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: |
3474 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: |
3475 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { |
3476 | Info.opc = getOpcForTextureInstr(Intrinsic); |
3477 | Info.memVT = MVT::v4i32; |
3478 | Info.ptrVal = nullptr; |
3479 | Info.offset = 0; |
3480 | Info.vol = 0; |
3481 | Info.readMem = true; |
3482 | Info.writeMem = false; |
3483 | Info.align = 16; |
3484 | return true; |
3485 | } |
3486 | case Intrinsic::nvvm_suld_1d_i8_clamp: |
3487 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: |
3488 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: |
3489 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: |
3490 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: |
3491 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: |
3492 | case Intrinsic::nvvm_suld_2d_i8_clamp: |
3493 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: |
3494 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: |
3495 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: |
3496 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: |
3497 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: |
3498 | case Intrinsic::nvvm_suld_3d_i8_clamp: |
3499 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: |
3500 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: |
3501 | case Intrinsic::nvvm_suld_1d_i8_trap: |
3502 | case Intrinsic::nvvm_suld_1d_v2i8_trap: |
3503 | case Intrinsic::nvvm_suld_1d_v4i8_trap: |
3504 | case Intrinsic::nvvm_suld_1d_array_i8_trap: |
3505 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: |
3506 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: |
3507 | case Intrinsic::nvvm_suld_2d_i8_trap: |
3508 | case Intrinsic::nvvm_suld_2d_v2i8_trap: |
3509 | case Intrinsic::nvvm_suld_2d_v4i8_trap: |
3510 | case Intrinsic::nvvm_suld_2d_array_i8_trap: |
3511 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: |
3512 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: |
3513 | case Intrinsic::nvvm_suld_3d_i8_trap: |
3514 | case Intrinsic::nvvm_suld_3d_v2i8_trap: |
3515 | case Intrinsic::nvvm_suld_3d_v4i8_trap: |
3516 | case Intrinsic::nvvm_suld_1d_i8_zero: |
3517 | case Intrinsic::nvvm_suld_1d_v2i8_zero: |
3518 | case Intrinsic::nvvm_suld_1d_v4i8_zero: |
3519 | case Intrinsic::nvvm_suld_1d_array_i8_zero: |
3520 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: |
3521 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: |
3522 | case Intrinsic::nvvm_suld_2d_i8_zero: |
3523 | case Intrinsic::nvvm_suld_2d_v2i8_zero: |
3524 | case Intrinsic::nvvm_suld_2d_v4i8_zero: |
3525 | case Intrinsic::nvvm_suld_2d_array_i8_zero: |
3526 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: |
3527 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: |
3528 | case Intrinsic::nvvm_suld_3d_i8_zero: |
3529 | case Intrinsic::nvvm_suld_3d_v2i8_zero: |
3530 | case Intrinsic::nvvm_suld_3d_v4i8_zero: { |
3531 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
3532 | Info.memVT = MVT::i8; |
3533 | Info.ptrVal = nullptr; |
3534 | Info.offset = 0; |
3535 | Info.vol = 0; |
3536 | Info.readMem = true; |
3537 | Info.writeMem = false; |
3538 | Info.align = 16; |
3539 | return true; |
3540 | } |
3541 | case Intrinsic::nvvm_suld_1d_i16_clamp: |
3542 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: |
3543 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: |
3544 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: |
3545 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: |
3546 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: |
3547 | case Intrinsic::nvvm_suld_2d_i16_clamp: |
3548 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: |
3549 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: |
3550 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: |
3551 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: |
3552 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: |
3553 | case Intrinsic::nvvm_suld_3d_i16_clamp: |
3554 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: |
3555 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: |
3556 | case Intrinsic::nvvm_suld_1d_i16_trap: |
3557 | case Intrinsic::nvvm_suld_1d_v2i16_trap: |
3558 | case Intrinsic::nvvm_suld_1d_v4i16_trap: |
3559 | case Intrinsic::nvvm_suld_1d_array_i16_trap: |
3560 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: |
3561 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: |
3562 | case Intrinsic::nvvm_suld_2d_i16_trap: |
3563 | case Intrinsic::nvvm_suld_2d_v2i16_trap: |
3564 | case Intrinsic::nvvm_suld_2d_v4i16_trap: |
3565 | case Intrinsic::nvvm_suld_2d_array_i16_trap: |
3566 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: |
3567 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: |
3568 | case Intrinsic::nvvm_suld_3d_i16_trap: |
3569 | case Intrinsic::nvvm_suld_3d_v2i16_trap: |
3570 | case Intrinsic::nvvm_suld_3d_v4i16_trap: |
3571 | case Intrinsic::nvvm_suld_1d_i16_zero: |
3572 | case Intrinsic::nvvm_suld_1d_v2i16_zero: |
3573 | case Intrinsic::nvvm_suld_1d_v4i16_zero: |
3574 | case Intrinsic::nvvm_suld_1d_array_i16_zero: |
3575 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: |
3576 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: |
3577 | case Intrinsic::nvvm_suld_2d_i16_zero: |
3578 | case Intrinsic::nvvm_suld_2d_v2i16_zero: |
3579 | case Intrinsic::nvvm_suld_2d_v4i16_zero: |
3580 | case Intrinsic::nvvm_suld_2d_array_i16_zero: |
3581 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: |
3582 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: |
3583 | case Intrinsic::nvvm_suld_3d_i16_zero: |
3584 | case Intrinsic::nvvm_suld_3d_v2i16_zero: |
3585 | case Intrinsic::nvvm_suld_3d_v4i16_zero: { |
3586 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
3587 | Info.memVT = MVT::i16; |
3588 | Info.ptrVal = nullptr; |
3589 | Info.offset = 0; |
3590 | Info.vol = 0; |
3591 | Info.readMem = true; |
3592 | Info.writeMem = false; |
3593 | Info.align = 16; |
3594 | return true; |
3595 | } |
3596 | case Intrinsic::nvvm_suld_1d_i32_clamp: |
3597 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: |
3598 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: |
3599 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: |
3600 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: |
3601 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: |
3602 | case Intrinsic::nvvm_suld_2d_i32_clamp: |
3603 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: |
3604 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: |
3605 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: |
3606 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: |
3607 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: |
3608 | case Intrinsic::nvvm_suld_3d_i32_clamp: |
3609 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: |
3610 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: |
3611 | case Intrinsic::nvvm_suld_1d_i32_trap: |
3612 | case Intrinsic::nvvm_suld_1d_v2i32_trap: |
3613 | case Intrinsic::nvvm_suld_1d_v4i32_trap: |
3614 | case Intrinsic::nvvm_suld_1d_array_i32_trap: |
3615 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: |
3616 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: |
3617 | case Intrinsic::nvvm_suld_2d_i32_trap: |
3618 | case Intrinsic::nvvm_suld_2d_v2i32_trap: |
3619 | case Intrinsic::nvvm_suld_2d_v4i32_trap: |
3620 | case Intrinsic::nvvm_suld_2d_array_i32_trap: |
3621 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: |
3622 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: |
3623 | case Intrinsic::nvvm_suld_3d_i32_trap: |
3624 | case Intrinsic::nvvm_suld_3d_v2i32_trap: |
3625 | case Intrinsic::nvvm_suld_3d_v4i32_trap: |
3626 | case Intrinsic::nvvm_suld_1d_i32_zero: |
3627 | case Intrinsic::nvvm_suld_1d_v2i32_zero: |
3628 | case Intrinsic::nvvm_suld_1d_v4i32_zero: |
3629 | case Intrinsic::nvvm_suld_1d_array_i32_zero: |
3630 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: |
3631 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: |
3632 | case Intrinsic::nvvm_suld_2d_i32_zero: |
3633 | case Intrinsic::nvvm_suld_2d_v2i32_zero: |
3634 | case Intrinsic::nvvm_suld_2d_v4i32_zero: |
3635 | case Intrinsic::nvvm_suld_2d_array_i32_zero: |
3636 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: |
3637 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: |
3638 | case Intrinsic::nvvm_suld_3d_i32_zero: |
3639 | case Intrinsic::nvvm_suld_3d_v2i32_zero: |
3640 | case Intrinsic::nvvm_suld_3d_v4i32_zero: { |
3641 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
3642 | Info.memVT = MVT::i32; |
3643 | Info.ptrVal = nullptr; |
3644 | Info.offset = 0; |
3645 | Info.vol = 0; |
3646 | Info.readMem = true; |
3647 | Info.writeMem = false; |
3648 | Info.align = 16; |
3649 | return true; |
3650 | } |
3651 | case Intrinsic::nvvm_suld_1d_i64_clamp: |
3652 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: |
3653 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: |
3654 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: |
3655 | case Intrinsic::nvvm_suld_2d_i64_clamp: |
3656 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: |
3657 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: |
3658 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: |
3659 | case Intrinsic::nvvm_suld_3d_i64_clamp: |
3660 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: |
3661 | case Intrinsic::nvvm_suld_1d_i64_trap: |
3662 | case Intrinsic::nvvm_suld_1d_v2i64_trap: |
3663 | case Intrinsic::nvvm_suld_1d_array_i64_trap: |
3664 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: |
3665 | case Intrinsic::nvvm_suld_2d_i64_trap: |
3666 | case Intrinsic::nvvm_suld_2d_v2i64_trap: |
3667 | case Intrinsic::nvvm_suld_2d_array_i64_trap: |
3668 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: |
3669 | case Intrinsic::nvvm_suld_3d_i64_trap: |
3670 | case Intrinsic::nvvm_suld_3d_v2i64_trap: |
3671 | case Intrinsic::nvvm_suld_1d_i64_zero: |
3672 | case Intrinsic::nvvm_suld_1d_v2i64_zero: |
3673 | case Intrinsic::nvvm_suld_1d_array_i64_zero: |
3674 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: |
3675 | case Intrinsic::nvvm_suld_2d_i64_zero: |
3676 | case Intrinsic::nvvm_suld_2d_v2i64_zero: |
3677 | case Intrinsic::nvvm_suld_2d_array_i64_zero: |
3678 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: |
3679 | case Intrinsic::nvvm_suld_3d_i64_zero: |
3680 | case Intrinsic::nvvm_suld_3d_v2i64_zero: { |
3681 | Info.opc = getOpcForSurfaceInstr(Intrinsic); |
3682 | Info.memVT = MVT::i64; |
3683 | Info.ptrVal = nullptr; |
3684 | Info.offset = 0; |
3685 | Info.vol = 0; |
3686 | Info.readMem = true; |
3687 | Info.writeMem = false; |
3688 | Info.align = 16; |
3689 | return true; |
3690 | } |
3691 | } |
3692 | return false; |
3693 | } |
3694 | |
3695 | /// isLegalAddressingMode - Return true if the addressing mode represented |
3696 | /// by AM is legal for this target, for a load/store of the specified type. |
3697 | /// Used to guide target specific optimizations, like loop strength reduction |
3698 | /// (LoopStrengthReduce.cpp) and memory optimization for address mode |
3699 | /// (CodeGenPrepare.cpp) |
3700 | bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, |
3701 | Type *Ty) const { |
3702 | |
3703 | // AddrMode - This represents an addressing mode of: |
3704 | // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
3705 | // |
3706 | // The legal address modes are |
3707 | // - [avar] |
3708 | // - [areg] |
3709 | // - [areg+immoff] |
3710 | // - [immAddr] |
3711 | |
3712 | if (AM.BaseGV) { |
3713 | if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) |
3714 | return false; |
3715 | return true; |
3716 | } |
3717 | |
3718 | switch (AM.Scale) { |
3719 | case 0: // "r", "r+i" or "i" is allowed |
3720 | break; |
3721 | case 1: |
3722 | if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. |
3723 | return false; |
3724 | // Otherwise we have r+i. |
3725 | break; |
3726 | default: |
3727 | // No scale > 1 is allowed |
3728 | return false; |
3729 | } |
3730 | return true; |
3731 | } |
3732 | |
3733 | //===----------------------------------------------------------------------===// |
3734 | // NVPTX Inline Assembly Support |
3735 | //===----------------------------------------------------------------------===// |
3736 | |
3737 | /// getConstraintType - Given a constraint letter, return the type of |
3738 | /// constraint it is for this target. |
3739 | NVPTXTargetLowering::ConstraintType |
3740 | NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { |
3741 | if (Constraint.size() == 1) { |
3742 | switch (Constraint[0]) { |
3743 | default: |
3744 | break; |
3745 | case 'b': |
3746 | case 'r': |
3747 | case 'h': |
3748 | case 'c': |
3749 | case 'l': |
3750 | case 'f': |
3751 | case 'd': |
3752 | case '0': |
3753 | case 'N': |
3754 | return C_RegisterClass; |
3755 | } |
3756 | } |
3757 | return TargetLowering::getConstraintType(Constraint); |
3758 | } |
3759 | |
3760 | std::pair<unsigned, const TargetRegisterClass *> |
3761 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, |
3762 | MVT VT) const { |
3763 | if (Constraint.size() == 1) { |
3764 | switch (Constraint[0]) { |
3765 | case 'b': |
3766 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); |
3767 | case 'c': |
3768 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); |
3769 | case 'h': |
3770 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); |
3771 | case 'r': |
3772 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); |
3773 | case 'l': |
3774 | case 'N': |
3775 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); |
3776 | case 'f': |
3777 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); |
3778 | case 'd': |
3779 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); |
3780 | } |
3781 | } |
3782 | return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); |
3783 | } |
3784 | |
3785 | /// getFunctionAlignment - Return the Log2 alignment of this function. |
3786 | unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { |
3787 | return 4; |
3788 | } |
3789 | |
3790 | //===----------------------------------------------------------------------===// |
3791 | // NVPTX DAG Combining |
3792 | //===----------------------------------------------------------------------===// |
3793 | |
3794 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, |
3795 | CodeGenOpt::Level OptLevel) const { |
3796 | const Function *F = MF.getFunction(); |
3797 | const TargetOptions &TO = MF.getTarget().Options; |
3798 | |
3799 | // Always honor command-line argument |
3800 | if (FMAContractLevelOpt.getNumOccurrences() > 0) { |
3801 | return FMAContractLevelOpt > 0; |
3802 | } else if (OptLevel == 0) { |
3803 | // Do not contract if we're not optimizing the code |
3804 | return false; |
3805 | } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { |
3806 | // Honor TargetOptions flags that explicitly say fusion is okay |
3807 | return true; |
3808 | } else if (F->hasFnAttribute("unsafe-fp-math")) { |
3809 | // Check for unsafe-fp-math=true coming from Clang |
3810 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); |
3811 | StringRef Val = Attr.getValueAsString(); |
3812 | if (Val == "true") |
3813 | return true; |
3814 | } |
3815 | |
3816 | // We did not have a clear indication that fusion is allowed, so assume not |
3817 | return false; |
3818 | } |
3819 | |
3820 | /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with |
3821 | /// operands N0 and N1. This is a helper for PerformADDCombine that is |
3822 | /// called with the default operands, and if that fails, with commuted |
3823 | /// operands. |
3824 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, |
3825 | TargetLowering::DAGCombinerInfo &DCI, |
3826 | const NVPTXSubtarget &Subtarget, |
3827 | CodeGenOpt::Level OptLevel) { |
3828 | SelectionDAG &DAG = DCI.DAG; |
3829 | // Skip non-integer, non-scalar case |
3830 | EVT VT=N0.getValueType(); |
3831 | if (VT.isVector()) |
3832 | return SDValue(); |
3833 | |
3834 | // fold (add (mul a, b), c) -> (mad a, b, c) |
3835 | // |
3836 | if (N0.getOpcode() == ISD::MUL) { |
3837 | assert (VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 3837, __PRETTY_FUNCTION__)); |
3838 | // For integer: |
3839 | // Since integer multiply-add costs the same as integer multiply |
3840 | // but is more costly than integer add, do the fusion only when |
3841 | // the mul is only used in the add. |
3842 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || |
3843 | !N0.getNode()->hasOneUse()) |
3844 | return SDValue(); |
3845 | |
3846 | // Do the folding |
3847 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, |
3848 | N0.getOperand(0), N0.getOperand(1), N1); |
3849 | } |
3850 | else if (N0.getOpcode() == ISD::FMUL) { |
3851 | if (VT == MVT::f32 || VT == MVT::f64) { |
3852 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( |
3853 | &DAG.getTargetLoweringInfo()); |
3854 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) |
3855 | return SDValue(); |
3856 | |
3857 | // For floating point: |
3858 | // Do the fusion only when the mul has less than 5 uses and all |
3859 | // are add. |
3860 | // The heuristic is that if a use is not an add, then that use |
3861 | // cannot be fused into fma, therefore mul is still needed anyway. |
3862 | // If there are more than 4 uses, even if they are all add, fusing |
3863 | // them will increase register pressue. |
3864 | // |
3865 | int numUses = 0; |
3866 | int nonAddCount = 0; |
3867 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), |
3868 | UE = N0.getNode()->use_end(); |
3869 | UI != UE; ++UI) { |
3870 | numUses++; |
3871 | SDNode *User = *UI; |
3872 | if (User->getOpcode() != ISD::FADD) |
3873 | ++nonAddCount; |
3874 | } |
3875 | if (numUses >= 5) |
3876 | return SDValue(); |
3877 | if (nonAddCount) { |
3878 | int orderNo = N->getIROrder(); |
3879 | int orderNo2 = N0.getNode()->getIROrder(); |
3880 | // simple heuristics here for considering potential register |
3881 | // pressure, the logics here is that the differnce are used |
3882 | // to measure the distance between def and use, the longer distance |
3883 | // more likely cause register pressure. |
3884 | if (orderNo - orderNo2 < 500) |
3885 | return SDValue(); |
3886 | |
3887 | // Now, check if at least one of the FMUL's operands is live beyond the node N, |
3888 | // which guarantees that the FMA will not increase register pressure at node N. |
3889 | bool opIsLive = false; |
3890 | const SDNode *left = N0.getOperand(0).getNode(); |
3891 | const SDNode *right = N0.getOperand(1).getNode(); |
3892 | |
3893 | if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) |
3894 | opIsLive = true; |
3895 | |
3896 | if (!opIsLive) |
3897 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { |
3898 | SDNode *User = *UI; |
3899 | int orderNo3 = User->getIROrder(); |
3900 | if (orderNo3 > orderNo) { |
3901 | opIsLive = true; |
3902 | break; |
3903 | } |
3904 | } |
3905 | |
3906 | if (!opIsLive) |
3907 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { |
3908 | SDNode *User = *UI; |
3909 | int orderNo3 = User->getIROrder(); |
3910 | if (orderNo3 > orderNo) { |
3911 | opIsLive = true; |
3912 | break; |
3913 | } |
3914 | } |
3915 | |
3916 | if (!opIsLive) |
3917 | return SDValue(); |
3918 | } |
3919 | |
3920 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, |
3921 | N0.getOperand(0), N0.getOperand(1), N1); |
3922 | } |
3923 | } |
3924 | |
3925 | return SDValue(); |
3926 | } |
3927 | |
3928 | /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. |
3929 | /// |
3930 | static SDValue PerformADDCombine(SDNode *N, |
3931 | TargetLowering::DAGCombinerInfo &DCI, |
3932 | const NVPTXSubtarget &Subtarget, |
3933 | CodeGenOpt::Level OptLevel) { |
3934 | SDValue N0 = N->getOperand(0); |
3935 | SDValue N1 = N->getOperand(1); |
3936 | |
3937 | // First try with the default operand order. |
3938 | SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, |
3939 | OptLevel); |
3940 | if (Result.getNode()) |
3941 | return Result; |
3942 | |
3943 | // If that didn't work, try again with the operands commuted. |
3944 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); |
3945 | } |
3946 | |
3947 | static SDValue PerformANDCombine(SDNode *N, |
3948 | TargetLowering::DAGCombinerInfo &DCI) { |
3949 | // The type legalizer turns a vector load of i8 values into a zextload to i16 |
3950 | // registers, optionally ANY_EXTENDs it (if target type is integer), |
3951 | // and ANDs off the high 8 bits. Since we turn this load into a |
3952 | // target-specific DAG node, the DAG combiner fails to eliminate these AND |
3953 | // nodes. Do that here. |
3954 | SDValue Val = N->getOperand(0); |
3955 | SDValue Mask = N->getOperand(1); |
3956 | |
3957 | if (isa<ConstantSDNode>(Val)) { |
3958 | std::swap(Val, Mask); |
3959 | } |
3960 | |
3961 | SDValue AExt; |
3962 | // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and |
3963 | if (Val.getOpcode() == ISD::ANY_EXTEND) { |
3964 | AExt = Val; |
3965 | Val = Val->getOperand(0); |
3966 | } |
3967 | |
3968 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { |
3969 | Val = Val->getOperand(0); |
3970 | } |
3971 | |
3972 | if (Val->getOpcode() == NVPTXISD::LoadV2 || |
3973 | Val->getOpcode() == NVPTXISD::LoadV4) { |
3974 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); |
3975 | if (!MaskCnst) { |
3976 | // Not an AND with a constant |
3977 | return SDValue(); |
3978 | } |
3979 | |
3980 | uint64_t MaskVal = MaskCnst->getZExtValue(); |
3981 | if (MaskVal != 0xff) { |
3982 | // Not an AND that chops off top 8 bits |
3983 | return SDValue(); |
3984 | } |
3985 | |
3986 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); |
3987 | if (!Mem) { |
3988 | // Not a MemSDNode?!? |
3989 | return SDValue(); |
3990 | } |
3991 | |
3992 | EVT MemVT = Mem->getMemoryVT(); |
3993 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { |
3994 | // We only handle the i8 case |
3995 | return SDValue(); |
3996 | } |
3997 | |
3998 | unsigned ExtType = |
3999 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> |
4000 | getZExtValue(); |
4001 | if (ExtType == ISD::SEXTLOAD) { |
4002 | // If for some reason the load is a sextload, the and is needed to zero |
4003 | // out the high 8 bits |
4004 | return SDValue(); |
4005 | } |
4006 | |
4007 | bool AddTo = false; |
4008 | if (AExt.getNode() != 0) { |
4009 | // Re-insert the ext as a zext. |
4010 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), |
4011 | AExt.getValueType(), Val); |
4012 | AddTo = true; |
4013 | } |
4014 | |
4015 | // If we get here, the AND is unnecessary. Just replace it with the load |
4016 | DCI.CombineTo(N, Val, AddTo); |
4017 | } |
4018 | |
4019 | return SDValue(); |
4020 | } |
4021 | |
4022 | enum OperandSignedness { |
4023 | Signed = 0, |
4024 | Unsigned, |
4025 | Unknown |
4026 | }; |
4027 | |
4028 | /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand |
4029 | /// that can be demoted to \p OptSize bits without loss of information. The |
4030 | /// signedness of the operand, if determinable, is placed in \p S. |
4031 | static bool IsMulWideOperandDemotable(SDValue Op, |
4032 | unsigned OptSize, |
4033 | OperandSignedness &S) { |
4034 | S = Unknown; |
4035 | |
4036 | if (Op.getOpcode() == ISD::SIGN_EXTEND || |
4037 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
4038 | EVT OrigVT = Op.getOperand(0).getValueType(); |
4039 | if (OrigVT.getSizeInBits() <= OptSize) { |
4040 | S = Signed; |
4041 | return true; |
4042 | } |
4043 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { |
4044 | EVT OrigVT = Op.getOperand(0).getValueType(); |
4045 | if (OrigVT.getSizeInBits() <= OptSize) { |
4046 | S = Unsigned; |
4047 | return true; |
4048 | } |
4049 | } |
4050 | |
4051 | return false; |
4052 | } |
4053 | |
4054 | /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can |
4055 | /// be demoted to \p OptSize bits without loss of information. If the operands |
4056 | /// contain a constant, it should appear as the RHS operand. The signedness of |
4057 | /// the operands is placed in \p IsSigned. |
4058 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, |
4059 | unsigned OptSize, |
4060 | bool &IsSigned) { |
4061 | |
4062 | OperandSignedness LHSSign; |
4063 | |
4064 | // The LHS operand must be a demotable op |
4065 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) |
4066 | return false; |
4067 | |
4068 | // We should have been able to determine the signedness from the LHS |
4069 | if (LHSSign == Unknown) |
4070 | return false; |
4071 | |
4072 | IsSigned = (LHSSign == Signed); |
4073 | |
4074 | // The RHS can be a demotable op or a constant |
4075 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { |
4076 | APInt Val = CI->getAPIntValue(); |
4077 | if (LHSSign == Unsigned) { |
4078 | if (Val.isIntN(OptSize)) { |
4079 | return true; |
4080 | } |
4081 | return false; |
4082 | } else { |
4083 | if (Val.isSignedIntN(OptSize)) { |
4084 | return true; |
4085 | } |
4086 | return false; |
4087 | } |
4088 | } else { |
4089 | OperandSignedness RHSSign; |
4090 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) |
4091 | return false; |
4092 | |
4093 | if (LHSSign != RHSSign) |
4094 | return false; |
4095 | |
4096 | return true; |
4097 | } |
4098 | } |
4099 | |
4100 | /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply |
4101 | /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform |
4102 | /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift |
4103 | /// amount. |
4104 | static SDValue TryMULWIDECombine(SDNode *N, |
4105 | TargetLowering::DAGCombinerInfo &DCI) { |
4106 | EVT MulType = N->getValueType(0); |
4107 | if (MulType != MVT::i32 && MulType != MVT::i64) { |
4108 | return SDValue(); |
4109 | } |
4110 | |
4111 | unsigned OptSize = MulType.getSizeInBits() >> 1; |
4112 | SDValue LHS = N->getOperand(0); |
4113 | SDValue RHS = N->getOperand(1); |
4114 | |
4115 | // Canonicalize the multiply so the constant (if any) is on the right |
4116 | if (N->getOpcode() == ISD::MUL) { |
4117 | if (isa<ConstantSDNode>(LHS)) { |
4118 | std::swap(LHS, RHS); |
4119 | } |
4120 | } |
4121 | |
4122 | // If we have a SHL, determine the actual multiply amount |
4123 | if (N->getOpcode() == ISD::SHL) { |
4124 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); |
4125 | if (!ShlRHS) { |
4126 | return SDValue(); |
4127 | } |
4128 | |
4129 | APInt ShiftAmt = ShlRHS->getAPIntValue(); |
4130 | unsigned BitWidth = MulType.getSizeInBits(); |
4131 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { |
4132 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; |
4133 | RHS = DCI.DAG.getConstant(MulVal, MulType); |
4134 | } else { |
4135 | return SDValue(); |
4136 | } |
4137 | } |
4138 | |
4139 | bool Signed; |
4140 | // Verify that our operands are demotable |
4141 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { |
4142 | return SDValue(); |
4143 | } |
4144 | |
4145 | EVT DemotedVT; |
4146 | if (MulType == MVT::i32) { |
4147 | DemotedVT = MVT::i16; |
4148 | } else { |
4149 | DemotedVT = MVT::i32; |
4150 | } |
4151 | |
4152 | // Truncate the operands to the correct size. Note that these are just for |
4153 | // type consistency and will (likely) be eliminated in later phases. |
4154 | SDValue TruncLHS = |
4155 | DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); |
4156 | SDValue TruncRHS = |
4157 | DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); |
4158 | |
4159 | unsigned Opc; |
4160 | if (Signed) { |
4161 | Opc = NVPTXISD::MUL_WIDE_SIGNED; |
4162 | } else { |
4163 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; |
4164 | } |
4165 | |
4166 | return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); |
4167 | } |
4168 | |
4169 | /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. |
4170 | static SDValue PerformMULCombine(SDNode *N, |
4171 | TargetLowering::DAGCombinerInfo &DCI, |
4172 | CodeGenOpt::Level OptLevel) { |
4173 | if (OptLevel > 0) { |
4174 | // Try mul.wide combining at OptLevel > 0 |
4175 | SDValue Ret = TryMULWIDECombine(N, DCI); |
4176 | if (Ret.getNode()) |
4177 | return Ret; |
4178 | } |
4179 | |
4180 | return SDValue(); |
4181 | } |
4182 | |
4183 | /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. |
4184 | static SDValue PerformSHLCombine(SDNode *N, |
4185 | TargetLowering::DAGCombinerInfo &DCI, |
4186 | CodeGenOpt::Level OptLevel) { |
4187 | if (OptLevel > 0) { |
4188 | // Try mul.wide combining at OptLevel > 0 |
4189 | SDValue Ret = TryMULWIDECombine(N, DCI); |
4190 | if (Ret.getNode()) |
4191 | return Ret; |
4192 | } |
4193 | |
4194 | return SDValue(); |
4195 | } |
4196 | |
4197 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, |
4198 | DAGCombinerInfo &DCI) const { |
4199 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); |
4200 | switch (N->getOpcode()) { |
4201 | default: break; |
4202 | case ISD::ADD: |
4203 | case ISD::FADD: |
4204 | return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); |
4205 | case ISD::MUL: |
4206 | return PerformMULCombine(N, DCI, OptLevel); |
4207 | case ISD::SHL: |
4208 | return PerformSHLCombine(N, DCI, OptLevel); |
4209 | case ISD::AND: |
4210 | return PerformANDCombine(N, DCI); |
4211 | } |
4212 | return SDValue(); |
4213 | } |
4214 | |
4215 | /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. |
4216 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, |
4217 | const DataLayout *TD, |
4218 | SmallVectorImpl<SDValue> &Results) { |
4219 | EVT ResVT = N->getValueType(0); |
4220 | SDLoc DL(N); |
4221 | |
4222 | assert(ResVT.isVector() && "Vector load must have vector type")((ResVT.isVector() && "Vector load must have vector type" ) ? static_cast<void> (0) : __assert_fail ("ResVT.isVector() && \"Vector load must have vector type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4222, __PRETTY_FUNCTION__)); |
4223 | |
4224 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not |
4225 | // legal. We can (and should) split that into 2 loads of <2 x double> here |
4226 | // but I'm leaving that as a TODO for now. |
4227 | assert(ResVT.isSimple() && "Can only handle simple types")((ResVT.isSimple() && "Can only handle simple types") ? static_cast<void> (0) : __assert_fail ("ResVT.isSimple() && \"Can only handle simple types\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4227, __PRETTY_FUNCTION__)); |
4228 | switch (ResVT.getSimpleVT().SimpleTy) { |
4229 | default: |
4230 | return; |
4231 | case MVT::v2i8: |
4232 | case MVT::v2i16: |
4233 | case MVT::v2i32: |
4234 | case MVT::v2i64: |
4235 | case MVT::v2f32: |
4236 | case MVT::v2f64: |
4237 | case MVT::v4i8: |
4238 | case MVT::v4i16: |
4239 | case MVT::v4i32: |
4240 | case MVT::v4f32: |
4241 | // This is a "native" vector type |
4242 | break; |
4243 | } |
4244 | |
4245 | LoadSDNode *LD = cast<LoadSDNode>(N); |
4246 | |
4247 | unsigned Align = LD->getAlignment(); |
4248 | unsigned PrefAlign = |
4249 | TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); |
4250 | if (Align < PrefAlign) { |
4251 | // This load is not sufficiently aligned, so bail out and let this vector |
4252 | // load be scalarized. Note that we may still be able to emit smaller |
4253 | // vector loads. For example, if we are loading a <4 x float> with an |
4254 | // alignment of 8, this check will fail but the legalizer will try again |
4255 | // with 2 x <2 x float>, which will succeed with an alignment of 8. |
4256 | return; |
4257 | } |
4258 | |
4259 | EVT EltVT = ResVT.getVectorElementType(); |
4260 | unsigned NumElts = ResVT.getVectorNumElements(); |
4261 | |
4262 | // Since LoadV2 is a target node, we cannot rely on DAG type legalization. |
4263 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the |
4264 | // loaded type to i16 and propagate the "real" type as the memory type. |
4265 | bool NeedTrunc = false; |
4266 | if (EltVT.getSizeInBits() < 16) { |
4267 | EltVT = MVT::i16; |
4268 | NeedTrunc = true; |
4269 | } |
4270 | |
4271 | unsigned Opcode = 0; |
4272 | SDVTList LdResVTs; |
4273 | |
4274 | switch (NumElts) { |
4275 | default: |
4276 | return; |
4277 | case 2: |
4278 | Opcode = NVPTXISD::LoadV2; |
4279 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); |
4280 | break; |
4281 | case 4: { |
4282 | Opcode = NVPTXISD::LoadV4; |
4283 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; |
4284 | LdResVTs = DAG.getVTList(ListVTs); |
4285 | break; |
4286 | } |
4287 | } |
4288 | |
4289 | SmallVector<SDValue, 8> OtherOps; |
4290 | |
4291 | // Copy regular operands |
4292 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) |
4293 | OtherOps.push_back(N->getOperand(i)); |
4294 | |
4295 | // The select routine does not have access to the LoadSDNode instance, so |
4296 | // pass along the extension information |
4297 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); |
4298 | |
4299 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, |
4300 | LD->getMemoryVT(), |
4301 | LD->getMemOperand()); |
4302 | |
4303 | SmallVector<SDValue, 4> ScalarRes; |
4304 | |
4305 | for (unsigned i = 0; i < NumElts; ++i) { |
4306 | SDValue Res = NewLD.getValue(i); |
4307 | if (NeedTrunc) |
4308 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); |
4309 | ScalarRes.push_back(Res); |
4310 | } |
4311 | |
4312 | SDValue LoadChain = NewLD.getValue(NumElts); |
4313 | |
4314 | SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); |
4315 | |
4316 | Results.push_back(BuildVec); |
4317 | Results.push_back(LoadChain); |
4318 | } |
4319 | |
4320 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, |
4321 | SmallVectorImpl<SDValue> &Results) { |
4322 | SDValue Chain = N->getOperand(0); |
4323 | SDValue Intrin = N->getOperand(1); |
4324 | SDLoc DL(N); |
4325 | |
4326 | // Get the intrinsic ID |
4327 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); |
4328 | switch (IntrinNo) { |
4329 | default: |
4330 | return; |
4331 | case Intrinsic::nvvm_ldg_global_i: |
4332 | case Intrinsic::nvvm_ldg_global_f: |
4333 | case Intrinsic::nvvm_ldg_global_p: |
4334 | case Intrinsic::nvvm_ldu_global_i: |
4335 | case Intrinsic::nvvm_ldu_global_f: |
4336 | case Intrinsic::nvvm_ldu_global_p: { |
4337 | EVT ResVT = N->getValueType(0); |
4338 | |
4339 | if (ResVT.isVector()) { |
4340 | // Vector LDG/LDU |
4341 | |
4342 | unsigned NumElts = ResVT.getVectorNumElements(); |
4343 | EVT EltVT = ResVT.getVectorElementType(); |
4344 | |
4345 | // Since LDU/LDG are target nodes, we cannot rely on DAG type |
4346 | // legalization. |
4347 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the |
4348 | // loaded type to i16 and propagate the "real" type as the memory type. |
4349 | bool NeedTrunc = false; |
4350 | if (EltVT.getSizeInBits() < 16) { |
4351 | EltVT = MVT::i16; |
4352 | NeedTrunc = true; |
4353 | } |
4354 | |
4355 | unsigned Opcode = 0; |
4356 | SDVTList LdResVTs; |
4357 | |
4358 | switch (NumElts) { |
4359 | default: |
4360 | return; |
4361 | case 2: |
4362 | switch (IntrinNo) { |
4363 | default: |
4364 | return; |
4365 | case Intrinsic::nvvm_ldg_global_i: |
4366 | case Intrinsic::nvvm_ldg_global_f: |
4367 | case Intrinsic::nvvm_ldg_global_p: |
4368 | Opcode = NVPTXISD::LDGV2; |
4369 | break; |
4370 | case Intrinsic::nvvm_ldu_global_i: |
4371 | case Intrinsic::nvvm_ldu_global_f: |
4372 | case Intrinsic::nvvm_ldu_global_p: |
4373 | Opcode = NVPTXISD::LDUV2; |
4374 | break; |
4375 | } |
4376 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); |
4377 | break; |
4378 | case 4: { |
4379 | switch (IntrinNo) { |
4380 | default: |
4381 | return; |
4382 | case Intrinsic::nvvm_ldg_global_i: |
4383 | case Intrinsic::nvvm_ldg_global_f: |
4384 | case Intrinsic::nvvm_ldg_global_p: |
4385 | Opcode = NVPTXISD::LDGV4; |
4386 | break; |
4387 | case Intrinsic::nvvm_ldu_global_i: |
4388 | case Intrinsic::nvvm_ldu_global_f: |
4389 | case Intrinsic::nvvm_ldu_global_p: |
4390 | Opcode = NVPTXISD::LDUV4; |
4391 | break; |
4392 | } |
4393 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; |
4394 | LdResVTs = DAG.getVTList(ListVTs); |
4395 | break; |
4396 | } |
4397 | } |
4398 | |
4399 | SmallVector<SDValue, 8> OtherOps; |
4400 | |
4401 | // Copy regular operands |
4402 | |
4403 | OtherOps.push_back(Chain); // Chain |
4404 | // Skip operand 1 (intrinsic ID) |
4405 | // Others |
4406 | for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) |
4407 | OtherOps.push_back(N->getOperand(i)); |
4408 | |
4409 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); |
4410 | |
4411 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, |
4412 | MemSD->getMemoryVT(), |
4413 | MemSD->getMemOperand()); |
4414 | |
4415 | SmallVector<SDValue, 4> ScalarRes; |
4416 | |
4417 | for (unsigned i = 0; i < NumElts; ++i) { |
4418 | SDValue Res = NewLD.getValue(i); |
4419 | if (NeedTrunc) |
4420 | Res = |
4421 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); |
4422 | ScalarRes.push_back(Res); |
4423 | } |
4424 | |
4425 | SDValue LoadChain = NewLD.getValue(NumElts); |
4426 | |
4427 | SDValue BuildVec = |
4428 | DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); |
4429 | |
4430 | Results.push_back(BuildVec); |
4431 | Results.push_back(LoadChain); |
4432 | } else { |
4433 | // i8 LDG/LDU |
4434 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4435, __PRETTY_FUNCTION__)) |
4435 | "Custom handling of non-i8 ldu/ldg?")((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn216889/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4435, __PRETTY_FUNCTION__)); |
4436 | |
4437 | // Just copy all operands as-is |
4438 | SmallVector<SDValue, 4> Ops; |
4439 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) |
4440 | Ops.push_back(N->getOperand(i)); |
4441 | |
4442 | // Force output to i16 |
4443 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); |
4444 | |
4445 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); |
4446 | |
4447 | // We make sure the memory type is i8, which will be used during isel |
4448 | // to select the proper instruction. |
4449 | SDValue NewLD = |
4450 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, |
4451 | MVT::i8, MemSD->getMemOperand()); |
4452 | |
4453 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
4454 | NewLD.getValue(0))); |
4455 | Results.push_back(NewLD.getValue(1)); |
4456 | } |
4457 | } |
4458 | } |
4459 | } |
4460 | |
4461 | void NVPTXTargetLowering::ReplaceNodeResults( |
4462 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
4463 | switch (N->getOpcode()) { |
4464 | default: |
4465 | report_fatal_error("Unhandled custom legalization"); |
4466 | case ISD::LOAD: |
4467 | ReplaceLoadVector(N, DAG, getDataLayout(), Results); |
4468 | return; |
4469 | case ISD::INTRINSIC_W_CHAIN: |
4470 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); |
4471 | return; |
4472 | } |
4473 | } |
4474 | |
4475 | // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. |
4476 | void NVPTXSection::anchor() {} |
4477 | |
4478 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { |
4479 | delete TextSection; |
4480 | delete DataSection; |
4481 | delete BSSSection; |
4482 | delete ReadOnlySection; |
4483 | |
4484 | delete StaticCtorSection; |
4485 | delete StaticDtorSection; |
4486 | delete LSDASection; |
4487 | delete EHFrameSection; |
4488 | delete DwarfAbbrevSection; |
4489 | delete DwarfInfoSection; |
4490 | delete DwarfLineSection; |
4491 | delete DwarfFrameSection; |
4492 | delete DwarfPubTypesSection; |
4493 | delete DwarfDebugInlineSection; |
4494 | delete DwarfStrSection; |
4495 | delete DwarfLocSection; |
4496 | delete DwarfARangesSection; |
4497 | delete DwarfRangesSection; |
4498 | delete DwarfMacroInfoSection; |
4499 | } |