File: | lib/Target/NVPTX/NVPTXISelLowering.cpp |
Location: | line 1591, column 13 |
Description: | Called C++ object pointer is null |
1 | // | |||
2 | // The LLVM Compiler Infrastructure | |||
3 | // | |||
4 | // This file is distributed under the University of Illinois Open Source | |||
5 | // License. See LICENSE.TXT for details. | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a | |||
10 | // selection DAG. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "NVPTXISelLowering.h" | |||
15 | #include "NVPTX.h" | |||
16 | #include "NVPTXTargetMachine.h" | |||
17 | #include "NVPTXTargetObjectFile.h" | |||
18 | #include "NVPTXUtilities.h" | |||
19 | #include "llvm/CodeGen/Analysis.h" | |||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
21 | #include "llvm/CodeGen/MachineFunction.h" | |||
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
24 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | |||
25 | #include "llvm/IR/CallSite.h" | |||
26 | #include "llvm/IR/DerivedTypes.h" | |||
27 | #include "llvm/IR/Function.h" | |||
28 | #include "llvm/IR/GlobalValue.h" | |||
29 | #include "llvm/IR/IntrinsicInst.h" | |||
30 | #include "llvm/IR/Intrinsics.h" | |||
31 | #include "llvm/IR/Module.h" | |||
32 | #include "llvm/MC/MCSectionELF.h" | |||
33 | #include "llvm/Support/CommandLine.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Support/ErrorHandling.h" | |||
36 | #include "llvm/Support/MathExtras.h" | |||
37 | #include "llvm/Support/raw_ostream.h" | |||
38 | #include <sstream> | |||
39 | ||||
40 | #undef DEBUG_TYPE"nvptx-lower" | |||
41 | #define DEBUG_TYPE"nvptx-lower" "nvptx-lower" | |||
42 | ||||
43 | using namespace llvm; | |||
44 | ||||
45 | static unsigned int uniqueCallSite = 0; | |||
46 | ||||
47 | static cl::opt<bool> sched4reg( | |||
48 | "nvptx-sched4reg", | |||
49 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); | |||
50 | ||||
51 | static cl::opt<unsigned> | |||
52 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, | |||
53 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" | |||
54 | " 1: do it 2: do it aggressively"), | |||
55 | cl::init(2)); | |||
56 | ||||
57 | static bool IsPTXVectorType(MVT VT) { | |||
58 | switch (VT.SimpleTy) { | |||
59 | default: | |||
60 | return false; | |||
61 | case MVT::v2i1: | |||
62 | case MVT::v4i1: | |||
63 | case MVT::v2i8: | |||
64 | case MVT::v4i8: | |||
65 | case MVT::v2i16: | |||
66 | case MVT::v4i16: | |||
67 | case MVT::v2i32: | |||
68 | case MVT::v4i32: | |||
69 | case MVT::v2i64: | |||
70 | case MVT::v2f32: | |||
71 | case MVT::v4f32: | |||
72 | case MVT::v2f64: | |||
73 | return true; | |||
74 | } | |||
75 | } | |||
76 | ||||
77 | /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive | |||
78 | /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors | |||
79 | /// into their primitive components. | |||
80 | /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the | |||
81 | /// same number of types as the Ins/Outs arrays in LowerFormalArguments, | |||
82 | /// LowerCall, and LowerReturn. | |||
83 | static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, | |||
84 | SmallVectorImpl<EVT> &ValueVTs, | |||
85 | SmallVectorImpl<uint64_t> *Offsets = nullptr, | |||
86 | uint64_t StartingOffset = 0) { | |||
87 | SmallVector<EVT, 16> TempVTs; | |||
88 | SmallVector<uint64_t, 16> TempOffsets; | |||
89 | ||||
90 | ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); | |||
91 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { | |||
92 | EVT VT = TempVTs[i]; | |||
93 | uint64_t Off = TempOffsets[i]; | |||
94 | if (VT.isVector()) | |||
95 | for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { | |||
96 | ValueVTs.push_back(VT.getVectorElementType()); | |||
97 | if (Offsets) | |||
98 | Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); | |||
99 | } | |||
100 | else { | |||
101 | ValueVTs.push_back(VT); | |||
102 | if (Offsets) | |||
103 | Offsets->push_back(Off); | |||
104 | } | |||
105 | } | |||
106 | } | |||
107 | ||||
108 | // NVPTXTargetLowering Constructor. | |||
109 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) | |||
110 | : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), | |||
111 | nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { | |||
112 | ||||
113 | // always lower memset, memcpy, and memmove intrinsics to load/store | |||
114 | // instructions, rather | |||
115 | // then generating calls to memset, mempcy or memmove. | |||
116 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; | |||
117 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; | |||
118 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; | |||
119 | ||||
120 | setBooleanContents(ZeroOrNegativeOneBooleanContent); | |||
121 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | |||
122 | ||||
123 | // Jump is Expensive. Don't create extra control flow for 'and', 'or' | |||
124 | // condition branches. | |||
125 | setJumpIsExpensive(true); | |||
126 | ||||
127 | // By default, use the Source scheduling | |||
128 | if (sched4reg) | |||
129 | setSchedulingPreference(Sched::RegPressure); | |||
130 | else | |||
131 | setSchedulingPreference(Sched::Source); | |||
132 | ||||
133 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); | |||
134 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); | |||
135 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); | |||
136 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); | |||
137 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); | |||
138 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); | |||
139 | ||||
140 | // Operations not directly supported by NVPTX. | |||
141 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); | |||
142 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); | |||
143 | setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); | |||
144 | setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); | |||
145 | setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); | |||
146 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); | |||
147 | setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); | |||
148 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); | |||
149 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); | |||
150 | setOperationAction(ISD::BR_CC, MVT::i1, Expand); | |||
151 | setOperationAction(ISD::BR_CC, MVT::i8, Expand); | |||
152 | setOperationAction(ISD::BR_CC, MVT::i16, Expand); | |||
153 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); | |||
154 | setOperationAction(ISD::BR_CC, MVT::i64, Expand); | |||
155 | // Some SIGN_EXTEND_INREG can be done using cvt instruction. | |||
156 | // For others we will expand to a SHL/SRA pair. | |||
157 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); | |||
158 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); | |||
159 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); | |||
160 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); | |||
161 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | |||
162 | ||||
163 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); | |||
164 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); | |||
165 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); | |||
166 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); | |||
167 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); | |||
168 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); | |||
169 | ||||
170 | if (nvptxSubtarget.hasROT64()) { | |||
171 | setOperationAction(ISD::ROTL, MVT::i64, Legal); | |||
172 | setOperationAction(ISD::ROTR, MVT::i64, Legal); | |||
173 | } else { | |||
174 | setOperationAction(ISD::ROTL, MVT::i64, Expand); | |||
175 | setOperationAction(ISD::ROTR, MVT::i64, Expand); | |||
176 | } | |||
177 | if (nvptxSubtarget.hasROT32()) { | |||
178 | setOperationAction(ISD::ROTL, MVT::i32, Legal); | |||
179 | setOperationAction(ISD::ROTR, MVT::i32, Legal); | |||
180 | } else { | |||
181 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | |||
182 | setOperationAction(ISD::ROTR, MVT::i32, Expand); | |||
183 | } | |||
184 | ||||
185 | setOperationAction(ISD::ROTL, MVT::i16, Expand); | |||
186 | setOperationAction(ISD::ROTR, MVT::i16, Expand); | |||
187 | setOperationAction(ISD::ROTL, MVT::i8, Expand); | |||
188 | setOperationAction(ISD::ROTR, MVT::i8, Expand); | |||
189 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); | |||
190 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | |||
191 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); | |||
192 | ||||
193 | // Indirect branch is not supported. | |||
194 | // This also disables Jump Table creation. | |||
195 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); | |||
196 | setOperationAction(ISD::BRIND, MVT::Other, Expand); | |||
197 | ||||
198 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | |||
199 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); | |||
200 | ||||
201 | // We want to legalize constant related memmove and memcopy | |||
202 | // intrinsics. | |||
203 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); | |||
204 | ||||
205 | // Turn FP extload into load/fextend | |||
206 | setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); | |||
207 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); | |||
208 | // Turn FP truncstore into trunc + store. | |||
209 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | |||
210 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | |||
211 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | |||
212 | ||||
213 | // PTX does not support load / store predicate registers | |||
214 | setOperationAction(ISD::LOAD, MVT::i1, Custom); | |||
215 | setOperationAction(ISD::STORE, MVT::i1, Custom); | |||
216 | ||||
217 | setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); | |||
218 | setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); | |||
219 | setTruncStoreAction(MVT::i64, MVT::i1, Expand); | |||
220 | setTruncStoreAction(MVT::i32, MVT::i1, Expand); | |||
221 | setTruncStoreAction(MVT::i16, MVT::i1, Expand); | |||
222 | setTruncStoreAction(MVT::i8, MVT::i1, Expand); | |||
223 | ||||
224 | // This is legal in NVPTX | |||
225 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); | |||
226 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); | |||
227 | ||||
228 | // TRAP can be lowered to PTX trap | |||
229 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | |||
230 | ||||
231 | setOperationAction(ISD::ADDC, MVT::i64, Expand); | |||
232 | setOperationAction(ISD::ADDE, MVT::i64, Expand); | |||
233 | ||||
234 | // Register custom handling for vector loads/stores | |||
235 | for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; | |||
236 | ++i) { | |||
237 | MVT VT = (MVT::SimpleValueType) i; | |||
238 | if (IsPTXVectorType(VT)) { | |||
239 | setOperationAction(ISD::LOAD, VT, Custom); | |||
240 | setOperationAction(ISD::STORE, VT, Custom); | |||
241 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); | |||
242 | } | |||
243 | } | |||
244 | ||||
245 | // Custom handling for i8 intrinsics | |||
246 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); | |||
247 | ||||
248 | setOperationAction(ISD::CTLZ, MVT::i16, Legal); | |||
249 | setOperationAction(ISD::CTLZ, MVT::i32, Legal); | |||
250 | setOperationAction(ISD::CTLZ, MVT::i64, Legal); | |||
251 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); | |||
252 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); | |||
253 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); | |||
254 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); | |||
255 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); | |||
256 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); | |||
257 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); | |||
258 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); | |||
259 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); | |||
260 | setOperationAction(ISD::CTPOP, MVT::i16, Legal); | |||
261 | setOperationAction(ISD::CTPOP, MVT::i32, Legal); | |||
262 | setOperationAction(ISD::CTPOP, MVT::i64, Legal); | |||
263 | ||||
264 | // We have some custom DAG combine patterns for these nodes | |||
265 | setTargetDAGCombine(ISD::ADD); | |||
266 | setTargetDAGCombine(ISD::AND); | |||
267 | setTargetDAGCombine(ISD::FADD); | |||
268 | setTargetDAGCombine(ISD::MUL); | |||
269 | setTargetDAGCombine(ISD::SHL); | |||
270 | ||||
271 | // Now deduce the information based on the above mentioned | |||
272 | // actions | |||
273 | computeRegisterProperties(); | |||
274 | } | |||
275 | ||||
276 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { | |||
277 | switch (Opcode) { | |||
278 | default: | |||
279 | return nullptr; | |||
280 | case NVPTXISD::CALL: | |||
281 | return "NVPTXISD::CALL"; | |||
282 | case NVPTXISD::RET_FLAG: | |||
283 | return "NVPTXISD::RET_FLAG"; | |||
284 | case NVPTXISD::Wrapper: | |||
285 | return "NVPTXISD::Wrapper"; | |||
286 | case NVPTXISD::DeclareParam: | |||
287 | return "NVPTXISD::DeclareParam"; | |||
288 | case NVPTXISD::DeclareScalarParam: | |||
289 | return "NVPTXISD::DeclareScalarParam"; | |||
290 | case NVPTXISD::DeclareRet: | |||
291 | return "NVPTXISD::DeclareRet"; | |||
292 | case NVPTXISD::DeclareRetParam: | |||
293 | return "NVPTXISD::DeclareRetParam"; | |||
294 | case NVPTXISD::PrintCall: | |||
295 | return "NVPTXISD::PrintCall"; | |||
296 | case NVPTXISD::LoadParam: | |||
297 | return "NVPTXISD::LoadParam"; | |||
298 | case NVPTXISD::LoadParamV2: | |||
299 | return "NVPTXISD::LoadParamV2"; | |||
300 | case NVPTXISD::LoadParamV4: | |||
301 | return "NVPTXISD::LoadParamV4"; | |||
302 | case NVPTXISD::StoreParam: | |||
303 | return "NVPTXISD::StoreParam"; | |||
304 | case NVPTXISD::StoreParamV2: | |||
305 | return "NVPTXISD::StoreParamV2"; | |||
306 | case NVPTXISD::StoreParamV4: | |||
307 | return "NVPTXISD::StoreParamV4"; | |||
308 | case NVPTXISD::StoreParamS32: | |||
309 | return "NVPTXISD::StoreParamS32"; | |||
310 | case NVPTXISD::StoreParamU32: | |||
311 | return "NVPTXISD::StoreParamU32"; | |||
312 | case NVPTXISD::CallArgBegin: | |||
313 | return "NVPTXISD::CallArgBegin"; | |||
314 | case NVPTXISD::CallArg: | |||
315 | return "NVPTXISD::CallArg"; | |||
316 | case NVPTXISD::LastCallArg: | |||
317 | return "NVPTXISD::LastCallArg"; | |||
318 | case NVPTXISD::CallArgEnd: | |||
319 | return "NVPTXISD::CallArgEnd"; | |||
320 | case NVPTXISD::CallVoid: | |||
321 | return "NVPTXISD::CallVoid"; | |||
322 | case NVPTXISD::CallVal: | |||
323 | return "NVPTXISD::CallVal"; | |||
324 | case NVPTXISD::CallSymbol: | |||
325 | return "NVPTXISD::CallSymbol"; | |||
326 | case NVPTXISD::Prototype: | |||
327 | return "NVPTXISD::Prototype"; | |||
328 | case NVPTXISD::MoveParam: | |||
329 | return "NVPTXISD::MoveParam"; | |||
330 | case NVPTXISD::StoreRetval: | |||
331 | return "NVPTXISD::StoreRetval"; | |||
332 | case NVPTXISD::StoreRetvalV2: | |||
333 | return "NVPTXISD::StoreRetvalV2"; | |||
334 | case NVPTXISD::StoreRetvalV4: | |||
335 | return "NVPTXISD::StoreRetvalV4"; | |||
336 | case NVPTXISD::PseudoUseParam: | |||
337 | return "NVPTXISD::PseudoUseParam"; | |||
338 | case NVPTXISD::RETURN: | |||
339 | return "NVPTXISD::RETURN"; | |||
340 | case NVPTXISD::CallSeqBegin: | |||
341 | return "NVPTXISD::CallSeqBegin"; | |||
342 | case NVPTXISD::CallSeqEnd: | |||
343 | return "NVPTXISD::CallSeqEnd"; | |||
344 | case NVPTXISD::CallPrototype: | |||
345 | return "NVPTXISD::CallPrototype"; | |||
346 | case NVPTXISD::LoadV2: | |||
347 | return "NVPTXISD::LoadV2"; | |||
348 | case NVPTXISD::LoadV4: | |||
349 | return "NVPTXISD::LoadV4"; | |||
350 | case NVPTXISD::LDGV2: | |||
351 | return "NVPTXISD::LDGV2"; | |||
352 | case NVPTXISD::LDGV4: | |||
353 | return "NVPTXISD::LDGV4"; | |||
354 | case NVPTXISD::LDUV2: | |||
355 | return "NVPTXISD::LDUV2"; | |||
356 | case NVPTXISD::LDUV4: | |||
357 | return "NVPTXISD::LDUV4"; | |||
358 | case NVPTXISD::StoreV2: | |||
359 | return "NVPTXISD::StoreV2"; | |||
360 | case NVPTXISD::StoreV4: | |||
361 | return "NVPTXISD::StoreV4"; | |||
362 | case NVPTXISD::FUN_SHFL_CLAMP: | |||
363 | return "NVPTXISD::FUN_SHFL_CLAMP"; | |||
364 | case NVPTXISD::FUN_SHFR_CLAMP: | |||
365 | return "NVPTXISD::FUN_SHFR_CLAMP"; | |||
366 | case NVPTXISD::IMAD: | |||
367 | return "NVPTXISD::IMAD"; | |||
368 | case NVPTXISD::MUL_WIDE_SIGNED: | |||
369 | return "NVPTXISD::MUL_WIDE_SIGNED"; | |||
370 | case NVPTXISD::MUL_WIDE_UNSIGNED: | |||
371 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; | |||
372 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; | |||
373 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; | |||
374 | case NVPTXISD::Tex1DFloatFloatLevel: | |||
375 | return "NVPTXISD::Tex1DFloatFloatLevel"; | |||
376 | case NVPTXISD::Tex1DFloatFloatGrad: | |||
377 | return "NVPTXISD::Tex1DFloatFloatGrad"; | |||
378 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; | |||
379 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; | |||
380 | case NVPTXISD::Tex1DS32FloatLevel: | |||
381 | return "NVPTXISD::Tex1DS32FloatLevel"; | |||
382 | case NVPTXISD::Tex1DS32FloatGrad: | |||
383 | return "NVPTXISD::Tex1DS32FloatGrad"; | |||
384 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; | |||
385 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; | |||
386 | case NVPTXISD::Tex1DU32FloatLevel: | |||
387 | return "NVPTXISD::Tex1DU32FloatLevel"; | |||
388 | case NVPTXISD::Tex1DU32FloatGrad: | |||
389 | return "NVPTXISD::Tex1DU32FloatGrad"; | |||
390 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; | |||
391 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; | |||
392 | case NVPTXISD::Tex1DArrayFloatFloatLevel: | |||
393 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; | |||
394 | case NVPTXISD::Tex1DArrayFloatFloatGrad: | |||
395 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; | |||
396 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; | |||
397 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; | |||
398 | case NVPTXISD::Tex1DArrayS32FloatLevel: | |||
399 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; | |||
400 | case NVPTXISD::Tex1DArrayS32FloatGrad: | |||
401 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; | |||
402 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; | |||
403 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; | |||
404 | case NVPTXISD::Tex1DArrayU32FloatLevel: | |||
405 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; | |||
406 | case NVPTXISD::Tex1DArrayU32FloatGrad: | |||
407 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; | |||
408 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; | |||
409 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; | |||
410 | case NVPTXISD::Tex2DFloatFloatLevel: | |||
411 | return "NVPTXISD::Tex2DFloatFloatLevel"; | |||
412 | case NVPTXISD::Tex2DFloatFloatGrad: | |||
413 | return "NVPTXISD::Tex2DFloatFloatGrad"; | |||
414 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; | |||
415 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; | |||
416 | case NVPTXISD::Tex2DS32FloatLevel: | |||
417 | return "NVPTXISD::Tex2DS32FloatLevel"; | |||
418 | case NVPTXISD::Tex2DS32FloatGrad: | |||
419 | return "NVPTXISD::Tex2DS32FloatGrad"; | |||
420 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; | |||
421 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; | |||
422 | case NVPTXISD::Tex2DU32FloatLevel: | |||
423 | return "NVPTXISD::Tex2DU32FloatLevel"; | |||
424 | case NVPTXISD::Tex2DU32FloatGrad: | |||
425 | return "NVPTXISD::Tex2DU32FloatGrad"; | |||
426 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; | |||
427 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; | |||
428 | case NVPTXISD::Tex2DArrayFloatFloatLevel: | |||
429 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; | |||
430 | case NVPTXISD::Tex2DArrayFloatFloatGrad: | |||
431 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; | |||
432 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; | |||
433 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; | |||
434 | case NVPTXISD::Tex2DArrayS32FloatLevel: | |||
435 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; | |||
436 | case NVPTXISD::Tex2DArrayS32FloatGrad: | |||
437 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; | |||
438 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; | |||
439 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; | |||
440 | case NVPTXISD::Tex2DArrayU32FloatLevel: | |||
441 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; | |||
442 | case NVPTXISD::Tex2DArrayU32FloatGrad: | |||
443 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; | |||
444 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; | |||
445 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; | |||
446 | case NVPTXISD::Tex3DFloatFloatLevel: | |||
447 | return "NVPTXISD::Tex3DFloatFloatLevel"; | |||
448 | case NVPTXISD::Tex3DFloatFloatGrad: | |||
449 | return "NVPTXISD::Tex3DFloatFloatGrad"; | |||
450 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; | |||
451 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; | |||
452 | case NVPTXISD::Tex3DS32FloatLevel: | |||
453 | return "NVPTXISD::Tex3DS32FloatLevel"; | |||
454 | case NVPTXISD::Tex3DS32FloatGrad: | |||
455 | return "NVPTXISD::Tex3DS32FloatGrad"; | |||
456 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; | |||
457 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; | |||
458 | case NVPTXISD::Tex3DU32FloatLevel: | |||
459 | return "NVPTXISD::Tex3DU32FloatLevel"; | |||
460 | case NVPTXISD::Tex3DU32FloatGrad: | |||
461 | return "NVPTXISD::Tex3DU32FloatGrad"; | |||
462 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; | |||
463 | case NVPTXISD::TexCubeFloatFloatLevel: | |||
464 | return "NVPTXISD::TexCubeFloatFloatLevel"; | |||
465 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; | |||
466 | case NVPTXISD::TexCubeS32FloatLevel: | |||
467 | return "NVPTXISD::TexCubeS32FloatLevel"; | |||
468 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; | |||
469 | case NVPTXISD::TexCubeU32FloatLevel: | |||
470 | return "NVPTXISD::TexCubeU32FloatLevel"; | |||
471 | case NVPTXISD::TexCubeArrayFloatFloat: | |||
472 | return "NVPTXISD::TexCubeArrayFloatFloat"; | |||
473 | case NVPTXISD::TexCubeArrayFloatFloatLevel: | |||
474 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; | |||
475 | case NVPTXISD::TexCubeArrayS32Float: | |||
476 | return "NVPTXISD::TexCubeArrayS32Float"; | |||
477 | case NVPTXISD::TexCubeArrayS32FloatLevel: | |||
478 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; | |||
479 | case NVPTXISD::TexCubeArrayU32Float: | |||
480 | return "NVPTXISD::TexCubeArrayU32Float"; | |||
481 | case NVPTXISD::TexCubeArrayU32FloatLevel: | |||
482 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; | |||
483 | case NVPTXISD::Tld4R2DFloatFloat: | |||
484 | return "NVPTXISD::Tld4R2DFloatFloat"; | |||
485 | case NVPTXISD::Tld4G2DFloatFloat: | |||
486 | return "NVPTXISD::Tld4G2DFloatFloat"; | |||
487 | case NVPTXISD::Tld4B2DFloatFloat: | |||
488 | return "NVPTXISD::Tld4B2DFloatFloat"; | |||
489 | case NVPTXISD::Tld4A2DFloatFloat: | |||
490 | return "NVPTXISD::Tld4A2DFloatFloat"; | |||
491 | case NVPTXISD::Tld4R2DS64Float: | |||
492 | return "NVPTXISD::Tld4R2DS64Float"; | |||
493 | case NVPTXISD::Tld4G2DS64Float: | |||
494 | return "NVPTXISD::Tld4G2DS64Float"; | |||
495 | case NVPTXISD::Tld4B2DS64Float: | |||
496 | return "NVPTXISD::Tld4B2DS64Float"; | |||
497 | case NVPTXISD::Tld4A2DS64Float: | |||
498 | return "NVPTXISD::Tld4A2DS64Float"; | |||
499 | case NVPTXISD::Tld4R2DU64Float: | |||
500 | return "NVPTXISD::Tld4R2DU64Float"; | |||
501 | case NVPTXISD::Tld4G2DU64Float: | |||
502 | return "NVPTXISD::Tld4G2DU64Float"; | |||
503 | case NVPTXISD::Tld4B2DU64Float: | |||
504 | return "NVPTXISD::Tld4B2DU64Float"; | |||
505 | case NVPTXISD::Tld4A2DU64Float: | |||
506 | return "NVPTXISD::Tld4A2DU64Float"; | |||
507 | ||||
508 | case NVPTXISD::TexUnified1DFloatS32: | |||
509 | return "NVPTXISD::TexUnified1DFloatS32"; | |||
510 | case NVPTXISD::TexUnified1DFloatFloat: | |||
511 | return "NVPTXISD::TexUnified1DFloatFloat"; | |||
512 | case NVPTXISD::TexUnified1DFloatFloatLevel: | |||
513 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; | |||
514 | case NVPTXISD::TexUnified1DFloatFloatGrad: | |||
515 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; | |||
516 | case NVPTXISD::TexUnified1DS32S32: | |||
517 | return "NVPTXISD::TexUnified1DS32S32"; | |||
518 | case NVPTXISD::TexUnified1DS32Float: | |||
519 | return "NVPTXISD::TexUnified1DS32Float"; | |||
520 | case NVPTXISD::TexUnified1DS32FloatLevel: | |||
521 | return "NVPTXISD::TexUnified1DS32FloatLevel"; | |||
522 | case NVPTXISD::TexUnified1DS32FloatGrad: | |||
523 | return "NVPTXISD::TexUnified1DS32FloatGrad"; | |||
524 | case NVPTXISD::TexUnified1DU32S32: | |||
525 | return "NVPTXISD::TexUnified1DU32S32"; | |||
526 | case NVPTXISD::TexUnified1DU32Float: | |||
527 | return "NVPTXISD::TexUnified1DU32Float"; | |||
528 | case NVPTXISD::TexUnified1DU32FloatLevel: | |||
529 | return "NVPTXISD::TexUnified1DU32FloatLevel"; | |||
530 | case NVPTXISD::TexUnified1DU32FloatGrad: | |||
531 | return "NVPTXISD::TexUnified1DU32FloatGrad"; | |||
532 | case NVPTXISD::TexUnified1DArrayFloatS32: | |||
533 | return "NVPTXISD::TexUnified1DArrayFloatS32"; | |||
534 | case NVPTXISD::TexUnified1DArrayFloatFloat: | |||
535 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; | |||
536 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: | |||
537 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; | |||
538 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: | |||
539 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; | |||
540 | case NVPTXISD::TexUnified1DArrayS32S32: | |||
541 | return "NVPTXISD::TexUnified1DArrayS32S32"; | |||
542 | case NVPTXISD::TexUnified1DArrayS32Float: | |||
543 | return "NVPTXISD::TexUnified1DArrayS32Float"; | |||
544 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: | |||
545 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; | |||
546 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: | |||
547 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; | |||
548 | case NVPTXISD::TexUnified1DArrayU32S32: | |||
549 | return "NVPTXISD::TexUnified1DArrayU32S32"; | |||
550 | case NVPTXISD::TexUnified1DArrayU32Float: | |||
551 | return "NVPTXISD::TexUnified1DArrayU32Float"; | |||
552 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: | |||
553 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; | |||
554 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: | |||
555 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; | |||
556 | case NVPTXISD::TexUnified2DFloatS32: | |||
557 | return "NVPTXISD::TexUnified2DFloatS32"; | |||
558 | case NVPTXISD::TexUnified2DFloatFloat: | |||
559 | return "NVPTXISD::TexUnified2DFloatFloat"; | |||
560 | case NVPTXISD::TexUnified2DFloatFloatLevel: | |||
561 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; | |||
562 | case NVPTXISD::TexUnified2DFloatFloatGrad: | |||
563 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; | |||
564 | case NVPTXISD::TexUnified2DS32S32: | |||
565 | return "NVPTXISD::TexUnified2DS32S32"; | |||
566 | case NVPTXISD::TexUnified2DS32Float: | |||
567 | return "NVPTXISD::TexUnified2DS32Float"; | |||
568 | case NVPTXISD::TexUnified2DS32FloatLevel: | |||
569 | return "NVPTXISD::TexUnified2DS32FloatLevel"; | |||
570 | case NVPTXISD::TexUnified2DS32FloatGrad: | |||
571 | return "NVPTXISD::TexUnified2DS32FloatGrad"; | |||
572 | case NVPTXISD::TexUnified2DU32S32: | |||
573 | return "NVPTXISD::TexUnified2DU32S32"; | |||
574 | case NVPTXISD::TexUnified2DU32Float: | |||
575 | return "NVPTXISD::TexUnified2DU32Float"; | |||
576 | case NVPTXISD::TexUnified2DU32FloatLevel: | |||
577 | return "NVPTXISD::TexUnified2DU32FloatLevel"; | |||
578 | case NVPTXISD::TexUnified2DU32FloatGrad: | |||
579 | return "NVPTXISD::TexUnified2DU32FloatGrad"; | |||
580 | case NVPTXISD::TexUnified2DArrayFloatS32: | |||
581 | return "NVPTXISD::TexUnified2DArrayFloatS32"; | |||
582 | case NVPTXISD::TexUnified2DArrayFloatFloat: | |||
583 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; | |||
584 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: | |||
585 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; | |||
586 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: | |||
587 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; | |||
588 | case NVPTXISD::TexUnified2DArrayS32S32: | |||
589 | return "NVPTXISD::TexUnified2DArrayS32S32"; | |||
590 | case NVPTXISD::TexUnified2DArrayS32Float: | |||
591 | return "NVPTXISD::TexUnified2DArrayS32Float"; | |||
592 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: | |||
593 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; | |||
594 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: | |||
595 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; | |||
596 | case NVPTXISD::TexUnified2DArrayU32S32: | |||
597 | return "NVPTXISD::TexUnified2DArrayU32S32"; | |||
598 | case NVPTXISD::TexUnified2DArrayU32Float: | |||
599 | return "NVPTXISD::TexUnified2DArrayU32Float"; | |||
600 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: | |||
601 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; | |||
602 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: | |||
603 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; | |||
604 | case NVPTXISD::TexUnified3DFloatS32: | |||
605 | return "NVPTXISD::TexUnified3DFloatS32"; | |||
606 | case NVPTXISD::TexUnified3DFloatFloat: | |||
607 | return "NVPTXISD::TexUnified3DFloatFloat"; | |||
608 | case NVPTXISD::TexUnified3DFloatFloatLevel: | |||
609 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; | |||
610 | case NVPTXISD::TexUnified3DFloatFloatGrad: | |||
611 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; | |||
612 | case NVPTXISD::TexUnified3DS32S32: | |||
613 | return "NVPTXISD::TexUnified3DS32S32"; | |||
614 | case NVPTXISD::TexUnified3DS32Float: | |||
615 | return "NVPTXISD::TexUnified3DS32Float"; | |||
616 | case NVPTXISD::TexUnified3DS32FloatLevel: | |||
617 | return "NVPTXISD::TexUnified3DS32FloatLevel"; | |||
618 | case NVPTXISD::TexUnified3DS32FloatGrad: | |||
619 | return "NVPTXISD::TexUnified3DS32FloatGrad"; | |||
620 | case NVPTXISD::TexUnified3DU32S32: | |||
621 | return "NVPTXISD::TexUnified3DU32S32"; | |||
622 | case NVPTXISD::TexUnified3DU32Float: | |||
623 | return "NVPTXISD::TexUnified3DU32Float"; | |||
624 | case NVPTXISD::TexUnified3DU32FloatLevel: | |||
625 | return "NVPTXISD::TexUnified3DU32FloatLevel"; | |||
626 | case NVPTXISD::TexUnified3DU32FloatGrad: | |||
627 | return "NVPTXISD::TexUnified3DU32FloatGrad"; | |||
628 | case NVPTXISD::TexUnifiedCubeFloatFloat: | |||
629 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; | |||
630 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: | |||
631 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; | |||
632 | case NVPTXISD::TexUnifiedCubeS32Float: | |||
633 | return "NVPTXISD::TexUnifiedCubeS32Float"; | |||
634 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: | |||
635 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; | |||
636 | case NVPTXISD::TexUnifiedCubeU32Float: | |||
637 | return "NVPTXISD::TexUnifiedCubeU32Float"; | |||
638 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: | |||
639 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; | |||
640 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: | |||
641 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; | |||
642 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: | |||
643 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; | |||
644 | case NVPTXISD::TexUnifiedCubeArrayS32Float: | |||
645 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; | |||
646 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: | |||
647 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; | |||
648 | case NVPTXISD::TexUnifiedCubeArrayU32Float: | |||
649 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; | |||
650 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: | |||
651 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; | |||
652 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: | |||
653 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; | |||
654 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: | |||
655 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; | |||
656 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: | |||
657 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; | |||
658 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: | |||
659 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; | |||
660 | case NVPTXISD::Tld4UnifiedR2DS64Float: | |||
661 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; | |||
662 | case NVPTXISD::Tld4UnifiedG2DS64Float: | |||
663 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; | |||
664 | case NVPTXISD::Tld4UnifiedB2DS64Float: | |||
665 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; | |||
666 | case NVPTXISD::Tld4UnifiedA2DS64Float: | |||
667 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; | |||
668 | case NVPTXISD::Tld4UnifiedR2DU64Float: | |||
669 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; | |||
670 | case NVPTXISD::Tld4UnifiedG2DU64Float: | |||
671 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; | |||
672 | case NVPTXISD::Tld4UnifiedB2DU64Float: | |||
673 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; | |||
674 | case NVPTXISD::Tld4UnifiedA2DU64Float: | |||
675 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; | |||
676 | ||||
677 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; | |||
678 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; | |||
679 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; | |||
680 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; | |||
681 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; | |||
682 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; | |||
683 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; | |||
684 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; | |||
685 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; | |||
686 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; | |||
687 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; | |||
688 | ||||
689 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; | |||
690 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; | |||
691 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; | |||
692 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; | |||
693 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; | |||
694 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; | |||
695 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; | |||
696 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; | |||
697 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; | |||
698 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; | |||
699 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; | |||
700 | ||||
701 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; | |||
702 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; | |||
703 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; | |||
704 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; | |||
705 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; | |||
706 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; | |||
707 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; | |||
708 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; | |||
709 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; | |||
710 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; | |||
711 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; | |||
712 | ||||
713 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; | |||
714 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; | |||
715 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; | |||
716 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; | |||
717 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; | |||
718 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; | |||
719 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; | |||
720 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; | |||
721 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; | |||
722 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; | |||
723 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; | |||
724 | ||||
725 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; | |||
726 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; | |||
727 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; | |||
728 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; | |||
729 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; | |||
730 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; | |||
731 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; | |||
732 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; | |||
733 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; | |||
734 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; | |||
735 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; | |||
736 | ||||
737 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; | |||
738 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; | |||
739 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; | |||
740 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; | |||
741 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; | |||
742 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; | |||
743 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; | |||
744 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; | |||
745 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; | |||
746 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; | |||
747 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; | |||
748 | ||||
749 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; | |||
750 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; | |||
751 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; | |||
752 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; | |||
753 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; | |||
754 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; | |||
755 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; | |||
756 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; | |||
757 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; | |||
758 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; | |||
759 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; | |||
760 | ||||
761 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; | |||
762 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; | |||
763 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; | |||
764 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; | |||
765 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; | |||
766 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; | |||
767 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; | |||
768 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; | |||
769 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; | |||
770 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; | |||
771 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; | |||
772 | ||||
773 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; | |||
774 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; | |||
775 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; | |||
776 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; | |||
777 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; | |||
778 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; | |||
779 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; | |||
780 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; | |||
781 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; | |||
782 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; | |||
783 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; | |||
784 | ||||
785 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; | |||
786 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; | |||
787 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; | |||
788 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; | |||
789 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; | |||
790 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; | |||
791 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; | |||
792 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; | |||
793 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; | |||
794 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; | |||
795 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; | |||
796 | ||||
797 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; | |||
798 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; | |||
799 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; | |||
800 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; | |||
801 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; | |||
802 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; | |||
803 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; | |||
804 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; | |||
805 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; | |||
806 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; | |||
807 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; | |||
808 | ||||
809 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; | |||
810 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; | |||
811 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; | |||
812 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; | |||
813 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; | |||
814 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; | |||
815 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; | |||
816 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; | |||
817 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; | |||
818 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; | |||
819 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; | |||
820 | ||||
821 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; | |||
822 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; | |||
823 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; | |||
824 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; | |||
825 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; | |||
826 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; | |||
827 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; | |||
828 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; | |||
829 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; | |||
830 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; | |||
831 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; | |||
832 | ||||
833 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; | |||
834 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; | |||
835 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; | |||
836 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; | |||
837 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; | |||
838 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; | |||
839 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; | |||
840 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; | |||
841 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; | |||
842 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; | |||
843 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; | |||
844 | ||||
845 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; | |||
846 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; | |||
847 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; | |||
848 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; | |||
849 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; | |||
850 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; | |||
851 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; | |||
852 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; | |||
853 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; | |||
854 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; | |||
855 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; | |||
856 | } | |||
857 | } | |||
858 | ||||
859 | TargetLoweringBase::LegalizeTypeAction | |||
860 | NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { | |||
861 | if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) | |||
862 | return TypeSplitVector; | |||
863 | ||||
864 | return TargetLoweringBase::getPreferredVectorAction(VT); | |||
865 | } | |||
866 | ||||
867 | SDValue | |||
868 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { | |||
869 | SDLoc dl(Op); | |||
870 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
871 | Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); | |||
872 | return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); | |||
873 | } | |||
874 | ||||
875 | std::string | |||
876 | NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, | |||
877 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
878 | unsigned retAlignment, | |||
879 | const ImmutableCallSite *CS) const { | |||
880 | ||||
881 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); | |||
882 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 882, __PRETTY_FUNCTION__)); | |||
883 | if (!isABI) | |||
884 | return ""; | |||
885 | ||||
886 | std::stringstream O; | |||
887 | O << "prototype_" << uniqueCallSite << " : .callprototype "; | |||
888 | ||||
889 | if (retTy->getTypeID() == Type::VoidTyID) { | |||
890 | O << "()"; | |||
891 | } else { | |||
892 | O << "("; | |||
893 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { | |||
894 | unsigned size = 0; | |||
895 | if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { | |||
896 | size = ITy->getBitWidth(); | |||
897 | if (size < 32) | |||
898 | size = 32; | |||
899 | } else { | |||
900 | assert(retTy->isFloatingPointTy() &&((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 901, __PRETTY_FUNCTION__)) | |||
901 | "Floating point type expected here")((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 901, __PRETTY_FUNCTION__)); | |||
902 | size = retTy->getPrimitiveSizeInBits(); | |||
903 | } | |||
904 | ||||
905 | O << ".param .b" << size << " _"; | |||
906 | } else if (isa<PointerType>(retTy)) { | |||
907 | O << ".param .b" << getPointerTy().getSizeInBits() << " _"; | |||
908 | } else { | |||
909 | if((retTy->getTypeID() == Type::StructTyID) || | |||
910 | isa<VectorType>(retTy)) { | |||
911 | O << ".param .align " | |||
912 | << retAlignment | |||
913 | << " .b8 _[" | |||
914 | << getDataLayout()->getTypeAllocSize(retTy) << "]"; | |||
915 | } else { | |||
916 | assert(false && "Unknown return type")((false && "Unknown return type") ? static_cast<void > (0) : __assert_fail ("false && \"Unknown return type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 916, __PRETTY_FUNCTION__)); | |||
917 | } | |||
918 | } | |||
919 | O << ") "; | |||
920 | } | |||
921 | O << "_ ("; | |||
922 | ||||
923 | bool first = true; | |||
924 | MVT thePointerTy = getPointerTy(); | |||
925 | ||||
926 | unsigned OIdx = 0; | |||
927 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
928 | Type *Ty = Args[i].Ty; | |||
929 | if (!first) { | |||
930 | O << ", "; | |||
931 | } | |||
932 | first = false; | |||
933 | ||||
934 | if (Outs[OIdx].Flags.isByVal() == false) { | |||
935 | if (Ty->isAggregateType() || Ty->isVectorTy()) { | |||
936 | unsigned align = 0; | |||
937 | const CallInst *CallI = cast<CallInst>(CS->getInstruction()); | |||
938 | const DataLayout *TD = getDataLayout(); | |||
939 | // +1 because index 0 is reserved for return type alignment | |||
940 | if (!llvm::getAlign(*CallI, i + 1, align)) | |||
941 | align = TD->getABITypeAlignment(Ty); | |||
942 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
943 | O << ".param .align " << align << " .b8 "; | |||
944 | O << "_"; | |||
945 | O << "[" << sz << "]"; | |||
946 | // update the index for Outs | |||
947 | SmallVector<EVT, 16> vtparts; | |||
948 | ComputeValueVTs(*this, Ty, vtparts); | |||
949 | if (unsigned len = vtparts.size()) | |||
950 | OIdx += len - 1; | |||
951 | continue; | |||
952 | } | |||
953 | // i8 types in IR will be i16 types in SDAG | |||
954 | assert((getValueType(Ty) == Outs[OIdx].VT ||(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)) | |||
955 | (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)) | |||
956 | "type mismatch between callee prototype and arguments")(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 956, __PRETTY_FUNCTION__)); | |||
957 | // scalar type | |||
958 | unsigned sz = 0; | |||
959 | if (isa<IntegerType>(Ty)) { | |||
960 | sz = cast<IntegerType>(Ty)->getBitWidth(); | |||
961 | if (sz < 32) | |||
962 | sz = 32; | |||
963 | } else if (isa<PointerType>(Ty)) | |||
964 | sz = thePointerTy.getSizeInBits(); | |||
965 | else | |||
966 | sz = Ty->getPrimitiveSizeInBits(); | |||
967 | O << ".param .b" << sz << " "; | |||
968 | O << "_"; | |||
969 | continue; | |||
970 | } | |||
971 | const PointerType *PTy = dyn_cast<PointerType>(Ty); | |||
972 | assert(PTy && "Param with byval attribute should be a pointer type")((PTy && "Param with byval attribute should be a pointer type" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Param with byval attribute should be a pointer type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 972, __PRETTY_FUNCTION__)); | |||
973 | Type *ETy = PTy->getElementType(); | |||
974 | ||||
975 | unsigned align = Outs[OIdx].Flags.getByValAlign(); | |||
976 | unsigned sz = getDataLayout()->getTypeAllocSize(ETy); | |||
977 | O << ".param .align " << align << " .b8 "; | |||
978 | O << "_"; | |||
979 | O << "[" << sz << "]"; | |||
980 | } | |||
981 | O << ");"; | |||
982 | return O.str(); | |||
983 | } | |||
984 | ||||
985 | unsigned | |||
986 | NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, | |||
987 | const ImmutableCallSite *CS, | |||
988 | Type *Ty, | |||
989 | unsigned Idx) const { | |||
990 | const DataLayout *TD = getDataLayout(); | |||
991 | unsigned Align = 0; | |||
992 | const Value *DirectCallee = CS->getCalledFunction(); | |||
993 | ||||
994 | if (!DirectCallee) { | |||
995 | // We don't have a direct function symbol, but that may be because of | |||
996 | // constant cast instructions in the call. | |||
997 | const Instruction *CalleeI = CS->getInstruction(); | |||
998 | assert(CalleeI && "Call target is not a function or derived value?")((CalleeI && "Call target is not a function or derived value?" ) ? static_cast<void> (0) : __assert_fail ("CalleeI && \"Call target is not a function or derived value?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 998, __PRETTY_FUNCTION__)); | |||
999 | ||||
1000 | // With bitcast'd call targets, the instruction will be the call | |||
1001 | if (isa<CallInst>(CalleeI)) { | |||
1002 | // Check if we have call alignment metadata | |||
1003 | if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) | |||
1004 | return Align; | |||
1005 | ||||
1006 | const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); | |||
1007 | // Ignore any bitcast instructions | |||
1008 | while(isa<ConstantExpr>(CalleeV)) { | |||
1009 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); | |||
1010 | if (!CE->isCast()) | |||
1011 | break; | |||
1012 | // Look through the bitcast | |||
1013 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); | |||
1014 | } | |||
1015 | ||||
1016 | // We have now looked past all of the bitcasts. Do we finally have a | |||
1017 | // Function? | |||
1018 | if (isa<Function>(CalleeV)) | |||
1019 | DirectCallee = CalleeV; | |||
1020 | } | |||
1021 | } | |||
1022 | ||||
1023 | // Check for function alignment information if we found that the | |||
1024 | // ultimate target is a Function | |||
1025 | if (DirectCallee) | |||
1026 | if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) | |||
1027 | return Align; | |||
1028 | ||||
1029 | // Call is indirect or alignment information is not available, fall back to | |||
1030 | // the ABI type alignment | |||
1031 | return TD->getABITypeAlignment(Ty); | |||
1032 | } | |||
1033 | ||||
1034 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||
1035 | SmallVectorImpl<SDValue> &InVals) const { | |||
1036 | SelectionDAG &DAG = CLI.DAG; | |||
1037 | SDLoc dl = CLI.DL; | |||
1038 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | |||
1039 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | |||
1040 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | |||
1041 | SDValue Chain = CLI.Chain; | |||
1042 | SDValue Callee = CLI.Callee; | |||
1043 | bool &isTailCall = CLI.IsTailCall; | |||
1044 | ArgListTy &Args = CLI.getArgs(); | |||
1045 | Type *retTy = CLI.RetTy; | |||
| ||||
1046 | ImmutableCallSite *CS = CLI.CS; | |||
1047 | ||||
1048 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); | |||
1049 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1049, __PRETTY_FUNCTION__)); | |||
1050 | if (!isABI) | |||
1051 | return Chain; | |||
1052 | const DataLayout *TD = getDataLayout(); | |||
1053 | MachineFunction &MF = DAG.getMachineFunction(); | |||
1054 | const Function *F = MF.getFunction(); | |||
1055 | ||||
1056 | SDValue tempChain = Chain; | |||
1057 | Chain = | |||
1058 | DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), | |||
1059 | dl); | |||
1060 | SDValue InFlag = Chain.getValue(1); | |||
1061 | ||||
1062 | unsigned paramCount = 0; | |||
1063 | // Args.size() and Outs.size() need not match. | |||
1064 | // Outs.size() will be larger | |||
1065 | // * if there is an aggregate argument with multiple fields (each field | |||
1066 | // showing up separately in Outs) | |||
1067 | // * if there is a vector argument with more than typical vector-length | |||
1068 | // elements (generally if more than 4) where each vector element is | |||
1069 | // individually present in Outs. | |||
1070 | // So a different index should be used for indexing into Outs/OutVals. | |||
1071 | // See similar issue in LowerFormalArguments. | |||
1072 | unsigned OIdx = 0; | |||
1073 | // Declare the .params or .reg need to pass values | |||
1074 | // to the function | |||
1075 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
1076 | EVT VT = Outs[OIdx].VT; | |||
1077 | Type *Ty = Args[i].Ty; | |||
1078 | ||||
1079 | if (Outs[OIdx].Flags.isByVal() == false) { | |||
1080 | if (Ty->isAggregateType()) { | |||
1081 | // aggregate | |||
1082 | SmallVector<EVT, 16> vtparts; | |||
1083 | SmallVector<uint64_t, 16> Offsets; | |||
1084 | ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); | |||
1085 | ||||
1086 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); | |||
1087 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1088 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
1089 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1090 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), | |||
1091 | DAG.getConstant(paramCount, MVT::i32), | |||
1092 | DAG.getConstant(sz, MVT::i32), InFlag }; | |||
1093 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1094 | DeclareParamOps); | |||
1095 | InFlag = Chain.getValue(1); | |||
1096 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1097 | EVT elemtype = vtparts[j]; | |||
1098 | unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); | |||
1099 | if (elemtype.isInteger() && (sz < 8)) | |||
1100 | sz = 8; | |||
1101 | SDValue StVal = OutVals[OIdx]; | |||
1102 | if (elemtype.getSizeInBits() < 16) { | |||
1103 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); | |||
1104 | } | |||
1105 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1106 | SDValue CopyParamOps[] = { Chain, | |||
1107 | DAG.getConstant(paramCount, MVT::i32), | |||
1108 | DAG.getConstant(Offsets[j], MVT::i32), | |||
1109 | StVal, InFlag }; | |||
1110 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1111 | CopyParamVTs, CopyParamOps, | |||
1112 | elemtype, MachinePointerInfo(), | |||
1113 | ArgAlign); | |||
1114 | InFlag = Chain.getValue(1); | |||
1115 | ++OIdx; | |||
1116 | } | |||
1117 | if (vtparts.size() > 0) | |||
1118 | --OIdx; | |||
1119 | ++paramCount; | |||
1120 | continue; | |||
1121 | } | |||
1122 | if (Ty->isVectorTy()) { | |||
1123 | EVT ObjectVT = getValueType(Ty); | |||
1124 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); | |||
1125 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1126 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
1127 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1128 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), | |||
1129 | DAG.getConstant(paramCount, MVT::i32), | |||
1130 | DAG.getConstant(sz, MVT::i32), InFlag }; | |||
1131 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1132 | DeclareParamOps); | |||
1133 | InFlag = Chain.getValue(1); | |||
1134 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1135 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1136 | EVT MemVT = EltVT; | |||
1137 | bool NeedExtend = false; | |||
1138 | if (EltVT.getSizeInBits() < 16) { | |||
1139 | NeedExtend = true; | |||
1140 | EltVT = MVT::i16; | |||
1141 | } | |||
1142 | ||||
1143 | // V1 store | |||
1144 | if (NumElts == 1) { | |||
1145 | SDValue Elt = OutVals[OIdx++]; | |||
1146 | if (NeedExtend) | |||
1147 | Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); | |||
1148 | ||||
1149 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1150 | SDValue CopyParamOps[] = { Chain, | |||
1151 | DAG.getConstant(paramCount, MVT::i32), | |||
1152 | DAG.getConstant(0, MVT::i32), Elt, | |||
1153 | InFlag }; | |||
1154 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1155 | CopyParamVTs, CopyParamOps, | |||
1156 | MemVT, MachinePointerInfo()); | |||
1157 | InFlag = Chain.getValue(1); | |||
1158 | } else if (NumElts == 2) { | |||
1159 | SDValue Elt0 = OutVals[OIdx++]; | |||
1160 | SDValue Elt1 = OutVals[OIdx++]; | |||
1161 | if (NeedExtend) { | |||
1162 | Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); | |||
1163 | Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); | |||
1164 | } | |||
1165 | ||||
1166 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1167 | SDValue CopyParamOps[] = { Chain, | |||
1168 | DAG.getConstant(paramCount, MVT::i32), | |||
1169 | DAG.getConstant(0, MVT::i32), Elt0, Elt1, | |||
1170 | InFlag }; | |||
1171 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, | |||
1172 | CopyParamVTs, CopyParamOps, | |||
1173 | MemVT, MachinePointerInfo()); | |||
1174 | InFlag = Chain.getValue(1); | |||
1175 | } else { | |||
1176 | unsigned curOffset = 0; | |||
1177 | // V4 stores | |||
1178 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
1179 | // the | |||
1180 | // vector will be expanded to a power of 2 elements, so we know we can | |||
1181 | // always round up to the next multiple of 4 when creating the vector | |||
1182 | // stores. | |||
1183 | // e.g. 4 elem => 1 st.v4 | |||
1184 | // 6 elem => 2 st.v4 | |||
1185 | // 8 elem => 2 st.v4 | |||
1186 | // 11 elem => 3 st.v4 | |||
1187 | unsigned VecSize = 4; | |||
1188 | if (EltVT.getSizeInBits() == 64) | |||
1189 | VecSize = 2; | |||
1190 | ||||
1191 | // This is potentially only part of a vector, so assume all elements | |||
1192 | // are packed together. | |||
1193 | unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; | |||
1194 | ||||
1195 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1196 | // Get values | |||
1197 | SDValue StoreVal; | |||
1198 | SmallVector<SDValue, 8> Ops; | |||
1199 | Ops.push_back(Chain); | |||
1200 | Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); | |||
1201 | Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); | |||
1202 | ||||
1203 | unsigned Opc = NVPTXISD::StoreParamV2; | |||
1204 | ||||
1205 | StoreVal = OutVals[OIdx++]; | |||
1206 | if (NeedExtend) | |||
1207 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1208 | Ops.push_back(StoreVal); | |||
1209 | ||||
1210 | if (i + 1 < NumElts) { | |||
1211 | StoreVal = OutVals[OIdx++]; | |||
1212 | if (NeedExtend) | |||
1213 | StoreVal = | |||
1214 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1215 | } else { | |||
1216 | StoreVal = DAG.getUNDEF(EltVT); | |||
1217 | } | |||
1218 | Ops.push_back(StoreVal); | |||
1219 | ||||
1220 | if (VecSize == 4) { | |||
1221 | Opc = NVPTXISD::StoreParamV4; | |||
1222 | if (i + 2 < NumElts) { | |||
1223 | StoreVal = OutVals[OIdx++]; | |||
1224 | if (NeedExtend) | |||
1225 | StoreVal = | |||
1226 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1227 | } else { | |||
1228 | StoreVal = DAG.getUNDEF(EltVT); | |||
1229 | } | |||
1230 | Ops.push_back(StoreVal); | |||
1231 | ||||
1232 | if (i + 3 < NumElts) { | |||
1233 | StoreVal = OutVals[OIdx++]; | |||
1234 | if (NeedExtend) | |||
1235 | StoreVal = | |||
1236 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1237 | } else { | |||
1238 | StoreVal = DAG.getUNDEF(EltVT); | |||
1239 | } | |||
1240 | Ops.push_back(StoreVal); | |||
1241 | } | |||
1242 | ||||
1243 | Ops.push_back(InFlag); | |||
1244 | ||||
1245 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1246 | Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, | |||
1247 | MemVT, MachinePointerInfo()); | |||
1248 | InFlag = Chain.getValue(1); | |||
1249 | curOffset += PerStoreOffset; | |||
1250 | } | |||
1251 | } | |||
1252 | ++paramCount; | |||
1253 | --OIdx; | |||
1254 | continue; | |||
1255 | } | |||
1256 | // Plain scalar | |||
1257 | // for ABI, declare .param .b<size> .param<n>; | |||
1258 | unsigned sz = VT.getSizeInBits(); | |||
1259 | bool needExtend = false; | |||
1260 | if (VT.isInteger()) { | |||
1261 | if (sz < 16) | |||
1262 | needExtend = true; | |||
1263 | if (sz < 32) | |||
1264 | sz = 32; | |||
1265 | } | |||
1266 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1267 | SDValue DeclareParamOps[] = { Chain, | |||
1268 | DAG.getConstant(paramCount, MVT::i32), | |||
1269 | DAG.getConstant(sz, MVT::i32), | |||
1270 | DAG.getConstant(0, MVT::i32), InFlag }; | |||
1271 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, | |||
1272 | DeclareParamOps); | |||
1273 | InFlag = Chain.getValue(1); | |||
1274 | SDValue OutV = OutVals[OIdx]; | |||
1275 | if (needExtend) { | |||
1276 | // zext/sext i1 to i16 | |||
1277 | unsigned opc = ISD::ZERO_EXTEND; | |||
1278 | if (Outs[OIdx].Flags.isSExt()) | |||
1279 | opc = ISD::SIGN_EXTEND; | |||
1280 | OutV = DAG.getNode(opc, dl, MVT::i16, OutV); | |||
1281 | } | |||
1282 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1283 | SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), | |||
1284 | DAG.getConstant(0, MVT::i32), OutV, InFlag }; | |||
1285 | ||||
1286 | unsigned opcode = NVPTXISD::StoreParam; | |||
1287 | if (Outs[OIdx].Flags.isZExt()) | |||
1288 | opcode = NVPTXISD::StoreParamU32; | |||
1289 | else if (Outs[OIdx].Flags.isSExt()) | |||
1290 | opcode = NVPTXISD::StoreParamS32; | |||
1291 | Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, | |||
1292 | VT, MachinePointerInfo()); | |||
1293 | ||||
1294 | InFlag = Chain.getValue(1); | |||
1295 | ++paramCount; | |||
1296 | continue; | |||
1297 | } | |||
1298 | // struct or vector | |||
1299 | SmallVector<EVT, 16> vtparts; | |||
1300 | SmallVector<uint64_t, 16> Offsets; | |||
1301 | const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); | |||
1302 | assert(PTy && "Type of a byval parameter should be pointer")((PTy && "Type of a byval parameter should be pointer" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Type of a byval parameter should be pointer\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1302, __PRETTY_FUNCTION__)); | |||
1303 | ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); | |||
1304 | ||||
1305 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1306 | unsigned sz = Outs[OIdx].Flags.getByValSize(); | |||
1307 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1308 | unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); | |||
1309 | // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, | |||
1310 | // so we don't need to worry about natural alignment or not. | |||
1311 | // See TargetLowering::LowerCallTo(). | |||
1312 | SDValue DeclareParamOps[] = { | |||
1313 | Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), | |||
1314 | DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), | |||
1315 | InFlag | |||
1316 | }; | |||
1317 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1318 | DeclareParamOps); | |||
1319 | InFlag = Chain.getValue(1); | |||
1320 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1321 | EVT elemtype = vtparts[j]; | |||
1322 | int curOffset = Offsets[j]; | |||
1323 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); | |||
1324 | SDValue srcAddr = | |||
1325 | DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], | |||
1326 | DAG.getConstant(curOffset, getPointerTy())); | |||
1327 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, | |||
1328 | MachinePointerInfo(), false, false, false, | |||
1329 | PartAlign); | |||
1330 | if (elemtype.getSizeInBits() < 16) { | |||
1331 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); | |||
1332 | } | |||
1333 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1334 | SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), | |||
1335 | DAG.getConstant(curOffset, MVT::i32), theVal, | |||
1336 | InFlag }; | |||
1337 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, | |||
1338 | CopyParamOps, elemtype, | |||
1339 | MachinePointerInfo()); | |||
1340 | ||||
1341 | InFlag = Chain.getValue(1); | |||
1342 | } | |||
1343 | ++paramCount; | |||
1344 | } | |||
1345 | ||||
1346 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); | |||
1347 | unsigned retAlignment = 0; | |||
1348 | ||||
1349 | // Handle Result | |||
1350 | if (Ins.size() > 0) { | |||
1351 | SmallVector<EVT, 16> resvtparts; | |||
1352 | ComputeValueVTs(*this, retTy, resvtparts); | |||
1353 | ||||
1354 | // Declare | |||
1355 | // .param .align 16 .b8 retval0[<size-in-bytes>], or | |||
1356 | // .param .b<size-in-bits> retval0 | |||
1357 | unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); | |||
1358 | if (retTy->isSingleValueType()) { | |||
1359 | // Scalar needs to be at least 32bit wide | |||
1360 | if (resultsz < 32) | |||
1361 | resultsz = 32; | |||
1362 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1363 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), | |||
1364 | DAG.getConstant(resultsz, MVT::i32), | |||
1365 | DAG.getConstant(0, MVT::i32), InFlag }; | |||
1366 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, | |||
1367 | DeclareRetOps); | |||
1368 | InFlag = Chain.getValue(1); | |||
1369 | } else { | |||
1370 | retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); | |||
1371 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1372 | SDValue DeclareRetOps[] = { Chain, | |||
1373 | DAG.getConstant(retAlignment, MVT::i32), | |||
1374 | DAG.getConstant(resultsz / 8, MVT::i32), | |||
1375 | DAG.getConstant(0, MVT::i32), InFlag }; | |||
1376 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, | |||
1377 | DeclareRetOps); | |||
1378 | InFlag = Chain.getValue(1); | |||
1379 | } | |||
1380 | } | |||
1381 | ||||
1382 | if (!Func) { | |||
1383 | // This is indirect function call case : PTX requires a prototype of the | |||
1384 | // form | |||
1385 | // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); | |||
1386 | // to be emitted, and the label has to used as the last arg of call | |||
1387 | // instruction. | |||
1388 | // The prototype is embedded in a string and put as the operand for a | |||
1389 | // CallPrototype SDNode which will print out to the value of the string. | |||
1390 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1391 | std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); | |||
1392 | const char *ProtoStr = | |||
1393 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); | |||
1394 | SDValue ProtoOps[] = { | |||
1395 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, | |||
1396 | }; | |||
1397 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); | |||
1398 | InFlag = Chain.getValue(1); | |||
1399 | } | |||
1400 | // Op to just print "call" | |||
1401 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1402 | SDValue PrintCallOps[] = { | |||
1403 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag | |||
1404 | }; | |||
1405 | Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), | |||
1406 | dl, PrintCallVTs, PrintCallOps); | |||
1407 | InFlag = Chain.getValue(1); | |||
1408 | ||||
1409 | // Ops to print out the function name | |||
1410 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1411 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; | |||
1412 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); | |||
1413 | InFlag = Chain.getValue(1); | |||
1414 | ||||
1415 | // Ops to print out the param list | |||
1416 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1417 | SDValue CallArgBeginOps[] = { Chain, InFlag }; | |||
1418 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, | |||
1419 | CallArgBeginOps); | |||
1420 | InFlag = Chain.getValue(1); | |||
1421 | ||||
1422 | for (unsigned i = 0, e = paramCount; i != e; ++i) { | |||
1423 | unsigned opcode; | |||
1424 | if (i == (e - 1)) | |||
1425 | opcode = NVPTXISD::LastCallArg; | |||
1426 | else | |||
1427 | opcode = NVPTXISD::CallArg; | |||
1428 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1429 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), | |||
1430 | DAG.getConstant(i, MVT::i32), InFlag }; | |||
1431 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); | |||
1432 | InFlag = Chain.getValue(1); | |||
1433 | } | |||
1434 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1435 | SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), | |||
1436 | InFlag }; | |||
1437 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); | |||
1438 | InFlag = Chain.getValue(1); | |||
1439 | ||||
1440 | if (!Func) { | |||
1441 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1442 | SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), | |||
1443 | InFlag }; | |||
1444 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); | |||
1445 | InFlag = Chain.getValue(1); | |||
1446 | } | |||
1447 | ||||
1448 | // Generate loads from param memory/moves from registers for result | |||
1449 | if (Ins.size() > 0) { | |||
1450 | if (retTy && retTy->isVectorTy()) { | |||
1451 | EVT ObjectVT = getValueType(retTy); | |||
1452 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1453 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1454 | assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)) | |||
1455 | F->getContext(), ObjectVT) == NumElts &&((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)) | |||
1456 | "Vector was not scalarized")((nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters ( F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1456, __PRETTY_FUNCTION__)); | |||
1457 | unsigned sz = EltVT.getSizeInBits(); | |||
1458 | bool needTruncate = sz < 8 ? true : false; | |||
1459 | ||||
1460 | if (NumElts == 1) { | |||
1461 | // Just a simple load | |||
1462 | SmallVector<EVT, 4> LoadRetVTs; | |||
1463 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1464 | // If loading i1/i8 result, generate | |||
1465 | // load.b8 i16 | |||
1466 | // if i1 | |||
1467 | // trunc i16 to i1 | |||
1468 | LoadRetVTs.push_back(MVT::i16); | |||
1469 | } else | |||
1470 | LoadRetVTs.push_back(EltVT); | |||
1471 | LoadRetVTs.push_back(MVT::Other); | |||
1472 | LoadRetVTs.push_back(MVT::Glue); | |||
1473 | SmallVector<SDValue, 4> LoadRetOps; | |||
1474 | LoadRetOps.push_back(Chain); | |||
1475 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); | |||
1476 | LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); | |||
1477 | LoadRetOps.push_back(InFlag); | |||
1478 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1479 | NVPTXISD::LoadParam, dl, | |||
1480 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1481 | Chain = retval.getValue(1); | |||
1482 | InFlag = retval.getValue(2); | |||
1483 | SDValue Ret0 = retval; | |||
1484 | if (needTruncate) | |||
1485 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); | |||
1486 | InVals.push_back(Ret0); | |||
1487 | } else if (NumElts == 2) { | |||
1488 | // LoadV2 | |||
1489 | SmallVector<EVT, 4> LoadRetVTs; | |||
1490 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1491 | // If loading i1/i8 result, generate | |||
1492 | // load.b8 i16 | |||
1493 | // if i1 | |||
1494 | // trunc i16 to i1 | |||
1495 | LoadRetVTs.push_back(MVT::i16); | |||
1496 | LoadRetVTs.push_back(MVT::i16); | |||
1497 | } else { | |||
1498 | LoadRetVTs.push_back(EltVT); | |||
1499 | LoadRetVTs.push_back(EltVT); | |||
1500 | } | |||
1501 | LoadRetVTs.push_back(MVT::Other); | |||
1502 | LoadRetVTs.push_back(MVT::Glue); | |||
1503 | SmallVector<SDValue, 4> LoadRetOps; | |||
1504 | LoadRetOps.push_back(Chain); | |||
1505 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); | |||
1506 | LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); | |||
1507 | LoadRetOps.push_back(InFlag); | |||
1508 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1509 | NVPTXISD::LoadParamV2, dl, | |||
1510 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1511 | Chain = retval.getValue(2); | |||
1512 | InFlag = retval.getValue(3); | |||
1513 | SDValue Ret0 = retval.getValue(0); | |||
1514 | SDValue Ret1 = retval.getValue(1); | |||
1515 | if (needTruncate) { | |||
1516 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); | |||
1517 | InVals.push_back(Ret0); | |||
1518 | Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); | |||
1519 | InVals.push_back(Ret1); | |||
1520 | } else { | |||
1521 | InVals.push_back(Ret0); | |||
1522 | InVals.push_back(Ret1); | |||
1523 | } | |||
1524 | } else { | |||
1525 | // Split into N LoadV4 | |||
1526 | unsigned Ofst = 0; | |||
1527 | unsigned VecSize = 4; | |||
1528 | unsigned Opc = NVPTXISD::LoadParamV4; | |||
1529 | if (EltVT.getSizeInBits() == 64) { | |||
1530 | VecSize = 2; | |||
1531 | Opc = NVPTXISD::LoadParamV2; | |||
1532 | } | |||
1533 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
1534 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1535 | SmallVector<EVT, 8> LoadRetVTs; | |||
1536 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1537 | // If loading i1/i8 result, generate | |||
1538 | // load.b8 i16 | |||
1539 | // if i1 | |||
1540 | // trunc i16 to i1 | |||
1541 | for (unsigned j = 0; j < VecSize; ++j) | |||
1542 | LoadRetVTs.push_back(MVT::i16); | |||
1543 | } else { | |||
1544 | for (unsigned j = 0; j < VecSize; ++j) | |||
1545 | LoadRetVTs.push_back(EltVT); | |||
1546 | } | |||
1547 | LoadRetVTs.push_back(MVT::Other); | |||
1548 | LoadRetVTs.push_back(MVT::Glue); | |||
1549 | SmallVector<SDValue, 4> LoadRetOps; | |||
1550 | LoadRetOps.push_back(Chain); | |||
1551 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); | |||
1552 | LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); | |||
1553 | LoadRetOps.push_back(InFlag); | |||
1554 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1555 | Opc, dl, DAG.getVTList(LoadRetVTs), | |||
1556 | LoadRetOps, EltVT, MachinePointerInfo()); | |||
1557 | if (VecSize == 2) { | |||
1558 | Chain = retval.getValue(2); | |||
1559 | InFlag = retval.getValue(3); | |||
1560 | } else { | |||
1561 | Chain = retval.getValue(4); | |||
1562 | InFlag = retval.getValue(5); | |||
1563 | } | |||
1564 | ||||
1565 | for (unsigned j = 0; j < VecSize; ++j) { | |||
1566 | if (i + j >= NumElts) | |||
1567 | break; | |||
1568 | SDValue Elt = retval.getValue(j); | |||
1569 | if (needTruncate) | |||
1570 | Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); | |||
1571 | InVals.push_back(Elt); | |||
1572 | } | |||
1573 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
1574 | } | |||
1575 | } | |||
1576 | } else { | |||
1577 | SmallVector<EVT, 16> VTs; | |||
1578 | SmallVector<uint64_t, 16> Offsets; | |||
1579 | ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); | |||
1580 | assert(VTs.size() == Ins.size() && "Bad value decomposition")((VTs.size() == Ins.size() && "Bad value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == Ins.size() && \"Bad value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1580, __PRETTY_FUNCTION__)); | |||
1581 | unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); | |||
1582 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { | |||
1583 | unsigned sz = VTs[i].getSizeInBits(); | |||
1584 | unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); | |||
1585 | bool needTruncate = sz < 8 ? true : false; | |||
1586 | if (VTs[i].isInteger() && (sz < 8)) | |||
1587 | sz = 8; | |||
1588 | ||||
1589 | SmallVector<EVT, 4> LoadRetVTs; | |||
1590 | EVT TheLoadType = VTs[i]; | |||
1591 | if (retTy->isIntegerTy() && | |||
| ||||
1592 | TD->getTypeAllocSizeInBits(retTy) < 32) { | |||
1593 | // This is for integer types only, and specifically not for | |||
1594 | // aggregates. | |||
1595 | LoadRetVTs.push_back(MVT::i32); | |||
1596 | TheLoadType = MVT::i32; | |||
1597 | } else if (sz < 16) { | |||
1598 | // If loading i1/i8 result, generate | |||
1599 | // load i8 (-> i16) | |||
1600 | // trunc i16 to i1/i8 | |||
1601 | LoadRetVTs.push_back(MVT::i16); | |||
1602 | } else | |||
1603 | LoadRetVTs.push_back(Ins[i].VT); | |||
1604 | LoadRetVTs.push_back(MVT::Other); | |||
1605 | LoadRetVTs.push_back(MVT::Glue); | |||
1606 | ||||
1607 | SmallVector<SDValue, 4> LoadRetOps; | |||
1608 | LoadRetOps.push_back(Chain); | |||
1609 | LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); | |||
1610 | LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); | |||
1611 | LoadRetOps.push_back(InFlag); | |||
1612 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1613 | NVPTXISD::LoadParam, dl, | |||
1614 | DAG.getVTList(LoadRetVTs), LoadRetOps, | |||
1615 | TheLoadType, MachinePointerInfo(), AlignI); | |||
1616 | Chain = retval.getValue(1); | |||
1617 | InFlag = retval.getValue(2); | |||
1618 | SDValue Ret0 = retval.getValue(0); | |||
1619 | if (needTruncate) | |||
1620 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); | |||
1621 | InVals.push_back(Ret0); | |||
1622 | } | |||
1623 | } | |||
1624 | } | |||
1625 | ||||
1626 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), | |||
1627 | DAG.getIntPtrConstant(uniqueCallSite + 1, true), | |||
1628 | InFlag, dl); | |||
1629 | uniqueCallSite++; | |||
1630 | ||||
1631 | // set isTailCall to false for now, until we figure out how to express | |||
1632 | // tail call optimization in PTX | |||
1633 | isTailCall = false; | |||
1634 | return Chain; | |||
1635 | } | |||
1636 | ||||
1637 | // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() | |||
1638 | // (see LegalizeDAG.cpp). This is slow and uses local memory. | |||
1639 | // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 | |||
1640 | SDValue | |||
1641 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { | |||
1642 | SDNode *Node = Op.getNode(); | |||
1643 | SDLoc dl(Node); | |||
1644 | SmallVector<SDValue, 8> Ops; | |||
1645 | unsigned NumOperands = Node->getNumOperands(); | |||
1646 | for (unsigned i = 0; i < NumOperands; ++i) { | |||
1647 | SDValue SubOp = Node->getOperand(i); | |||
1648 | EVT VVT = SubOp.getNode()->getValueType(0); | |||
1649 | EVT EltVT = VVT.getVectorElementType(); | |||
1650 | unsigned NumSubElem = VVT.getVectorNumElements(); | |||
1651 | for (unsigned j = 0; j < NumSubElem; ++j) { | |||
1652 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, | |||
1653 | DAG.getIntPtrConstant(j))); | |||
1654 | } | |||
1655 | } | |||
1656 | return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); | |||
1657 | } | |||
1658 | ||||
1659 | /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which | |||
1660 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1661 | /// amount, or | |||
1662 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1663 | /// amount. | |||
1664 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, | |||
1665 | SelectionDAG &DAG) const { | |||
1666 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1666, __PRETTY_FUNCTION__)); | |||
1667 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD:: SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1667, __PRETTY_FUNCTION__)); | |||
1668 | ||||
1669 | EVT VT = Op.getValueType(); | |||
1670 | unsigned VTBits = VT.getSizeInBits(); | |||
1671 | SDLoc dl(Op); | |||
1672 | SDValue ShOpLo = Op.getOperand(0); | |||
1673 | SDValue ShOpHi = Op.getOperand(1); | |||
1674 | SDValue ShAmt = Op.getOperand(2); | |||
1675 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; | |||
1676 | ||||
1677 | if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { | |||
1678 | ||||
1679 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1680 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1681 | // dHi = aHi >> Amt | |||
1682 | // dLo = shf.r.clamp aLo, aHi, Amt | |||
1683 | ||||
1684 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1685 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1686 | ShAmt); | |||
1687 | ||||
1688 | SDValue Ops[2] = { Lo, Hi }; | |||
1689 | return DAG.getMergeValues(Ops, dl); | |||
1690 | } | |||
1691 | else { | |||
1692 | ||||
1693 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1694 | // - if (Amt>=size) then | |||
1695 | // dLo = aHi >> (Amt-size) | |||
1696 | // dHi = aHi >> Amt (this is either all 0 or all 1) | |||
1697 | // else | |||
1698 | // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) | |||
1699 | // dHi = aHi >> Amt | |||
1700 | ||||
1701 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1702 | DAG.getConstant(VTBits, MVT::i32), ShAmt); | |||
1703 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); | |||
1704 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1705 | DAG.getConstant(VTBits, MVT::i32)); | |||
1706 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); | |||
1707 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1708 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); | |||
1709 | ||||
1710 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1711 | DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); | |||
1712 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1713 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1714 | ||||
1715 | SDValue Ops[2] = { Lo, Hi }; | |||
1716 | return DAG.getMergeValues(Ops, dl); | |||
1717 | } | |||
1718 | } | |||
1719 | ||||
1720 | /// LowerShiftLeftParts - Lower SHL_PARTS, which | |||
1721 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1722 | /// amount, or | |||
1723 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1724 | /// amount. | |||
1725 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, | |||
1726 | SelectionDAG &DAG) const { | |||
1727 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1727, __PRETTY_FUNCTION__)); | |||
1728 | assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1728, __PRETTY_FUNCTION__)); | |||
1729 | ||||
1730 | EVT VT = Op.getValueType(); | |||
1731 | unsigned VTBits = VT.getSizeInBits(); | |||
1732 | SDLoc dl(Op); | |||
1733 | SDValue ShOpLo = Op.getOperand(0); | |||
1734 | SDValue ShOpHi = Op.getOperand(1); | |||
1735 | SDValue ShAmt = Op.getOperand(2); | |||
1736 | ||||
1737 | if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { | |||
1738 | ||||
1739 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1740 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1741 | // dHi = shf.l.clamp aLo, aHi, Amt | |||
1742 | // dLo = aLo << Amt | |||
1743 | ||||
1744 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1745 | ShAmt); | |||
1746 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1747 | ||||
1748 | SDValue Ops[2] = { Lo, Hi }; | |||
1749 | return DAG.getMergeValues(Ops, dl); | |||
1750 | } | |||
1751 | else { | |||
1752 | ||||
1753 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1754 | // - if (Amt>=size) then | |||
1755 | // dLo = aLo << Amt (all 0) | |||
1756 | // dLo = aLo << (Amt-size) | |||
1757 | // else | |||
1758 | // dLo = aLo << Amt | |||
1759 | // dHi = (aHi << Amt) | (aLo >> (size-Amt)) | |||
1760 | ||||
1761 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1762 | DAG.getConstant(VTBits, MVT::i32), ShAmt); | |||
1763 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); | |||
1764 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1765 | DAG.getConstant(VTBits, MVT::i32)); | |||
1766 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); | |||
1767 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1768 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); | |||
1769 | ||||
1770 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1771 | DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); | |||
1772 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1773 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1774 | ||||
1775 | SDValue Ops[2] = { Lo, Hi }; | |||
1776 | return DAG.getMergeValues(Ops, dl); | |||
1777 | } | |||
1778 | } | |||
1779 | ||||
1780 | SDValue | |||
1781 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||
1782 | switch (Op.getOpcode()) { | |||
1783 | case ISD::RETURNADDR: | |||
1784 | return SDValue(); | |||
1785 | case ISD::FRAMEADDR: | |||
1786 | return SDValue(); | |||
1787 | case ISD::GlobalAddress: | |||
1788 | return LowerGlobalAddress(Op, DAG); | |||
1789 | case ISD::INTRINSIC_W_CHAIN: | |||
1790 | return Op; | |||
1791 | case ISD::BUILD_VECTOR: | |||
1792 | case ISD::EXTRACT_SUBVECTOR: | |||
1793 | return Op; | |||
1794 | case ISD::CONCAT_VECTORS: | |||
1795 | return LowerCONCAT_VECTORS(Op, DAG); | |||
1796 | case ISD::STORE: | |||
1797 | return LowerSTORE(Op, DAG); | |||
1798 | case ISD::LOAD: | |||
1799 | return LowerLOAD(Op, DAG); | |||
1800 | case ISD::SHL_PARTS: | |||
1801 | return LowerShiftLeftParts(Op, DAG); | |||
1802 | case ISD::SRA_PARTS: | |||
1803 | case ISD::SRL_PARTS: | |||
1804 | return LowerShiftRightParts(Op, DAG); | |||
1805 | default: | |||
1806 | llvm_unreachable("Custom lowering not defined for operation")::llvm::llvm_unreachable_internal("Custom lowering not defined for operation" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1806); | |||
1807 | } | |||
1808 | } | |||
1809 | ||||
1810 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | |||
1811 | if (Op.getValueType() == MVT::i1) | |||
1812 | return LowerLOADi1(Op, DAG); | |||
1813 | else | |||
1814 | return SDValue(); | |||
1815 | } | |||
1816 | ||||
1817 | // v = ld i1* addr | |||
1818 | // => | |||
1819 | // v1 = ld i8* addr (-> i16) | |||
1820 | // v = trunc i16 to i1 | |||
1821 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { | |||
1822 | SDNode *Node = Op.getNode(); | |||
1823 | LoadSDNode *LD = cast<LoadSDNode>(Node); | |||
1824 | SDLoc dl(Node); | |||
1825 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD)((LD->getExtensionType() == ISD::NON_EXTLOAD) ? static_cast <void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1825, __PRETTY_FUNCTION__)); | |||
1826 | assert(Node->getValueType(0) == MVT::i1 &&((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1827, __PRETTY_FUNCTION__)) | |||
1827 | "Custom lowering for i1 load only")((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1827, __PRETTY_FUNCTION__)); | |||
1828 | SDValue newLD = | |||
1829 | DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), | |||
1830 | LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), | |||
1831 | LD->isInvariant(), LD->getAlignment()); | |||
1832 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); | |||
1833 | // The legalizer (the caller) is expecting two values from the legalized | |||
1834 | // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() | |||
1835 | // in LegalizeDAG.cpp which also uses MergeValues. | |||
1836 | SDValue Ops[] = { result, LD->getChain() }; | |||
1837 | return DAG.getMergeValues(Ops, dl); | |||
1838 | } | |||
1839 | ||||
1840 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | |||
1841 | EVT ValVT = Op.getOperand(1).getValueType(); | |||
1842 | if (ValVT == MVT::i1) | |||
1843 | return LowerSTOREi1(Op, DAG); | |||
1844 | else if (ValVT.isVector()) | |||
1845 | return LowerSTOREVector(Op, DAG); | |||
1846 | else | |||
1847 | return SDValue(); | |||
1848 | } | |||
1849 | ||||
1850 | SDValue | |||
1851 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { | |||
1852 | SDNode *N = Op.getNode(); | |||
1853 | SDValue Val = N->getOperand(1); | |||
1854 | SDLoc DL(N); | |||
1855 | EVT ValVT = Val.getValueType(); | |||
1856 | ||||
1857 | if (ValVT.isVector()) { | |||
1858 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
1859 | // legal. We can (and should) split that into 2 stores of <2 x double> here | |||
1860 | // but I'm leaving that as a TODO for now. | |||
1861 | if (!ValVT.isSimple()) | |||
1862 | return SDValue(); | |||
1863 | switch (ValVT.getSimpleVT().SimpleTy) { | |||
1864 | default: | |||
1865 | return SDValue(); | |||
1866 | case MVT::v2i8: | |||
1867 | case MVT::v2i16: | |||
1868 | case MVT::v2i32: | |||
1869 | case MVT::v2i64: | |||
1870 | case MVT::v2f32: | |||
1871 | case MVT::v2f64: | |||
1872 | case MVT::v4i8: | |||
1873 | case MVT::v4i16: | |||
1874 | case MVT::v4i32: | |||
1875 | case MVT::v4f32: | |||
1876 | // This is a "native" vector type | |||
1877 | break; | |||
1878 | } | |||
1879 | ||||
1880 | MemSDNode *MemSD = cast<MemSDNode>(N); | |||
1881 | const DataLayout *TD = getDataLayout(); | |||
1882 | ||||
1883 | unsigned Align = MemSD->getAlignment(); | |||
1884 | unsigned PrefAlign = | |||
1885 | TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); | |||
1886 | if (Align < PrefAlign) { | |||
1887 | // This store is not sufficiently aligned, so bail out and let this vector | |||
1888 | // store be scalarized. Note that we may still be able to emit smaller | |||
1889 | // vector stores. For example, if we are storing a <4 x float> with an | |||
1890 | // alignment of 8, this check will fail but the legalizer will try again | |||
1891 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
1892 | return SDValue(); | |||
1893 | } | |||
1894 | ||||
1895 | unsigned Opcode = 0; | |||
1896 | EVT EltVT = ValVT.getVectorElementType(); | |||
1897 | unsigned NumElts = ValVT.getVectorNumElements(); | |||
1898 | ||||
1899 | // Since StoreV2 is a target node, we cannot rely on DAG type legalization. | |||
1900 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
1901 | // stored type to i16 and propagate the "real" type as the memory type. | |||
1902 | bool NeedExt = false; | |||
1903 | if (EltVT.getSizeInBits() < 16) | |||
1904 | NeedExt = true; | |||
1905 | ||||
1906 | switch (NumElts) { | |||
1907 | default: | |||
1908 | return SDValue(); | |||
1909 | case 2: | |||
1910 | Opcode = NVPTXISD::StoreV2; | |||
1911 | break; | |||
1912 | case 4: { | |||
1913 | Opcode = NVPTXISD::StoreV4; | |||
1914 | break; | |||
1915 | } | |||
1916 | } | |||
1917 | ||||
1918 | SmallVector<SDValue, 8> Ops; | |||
1919 | ||||
1920 | // First is the chain | |||
1921 | Ops.push_back(N->getOperand(0)); | |||
1922 | ||||
1923 | // Then the split values | |||
1924 | for (unsigned i = 0; i < NumElts; ++i) { | |||
1925 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, | |||
1926 | DAG.getIntPtrConstant(i)); | |||
1927 | if (NeedExt) | |||
1928 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); | |||
1929 | Ops.push_back(ExtVal); | |||
1930 | } | |||
1931 | ||||
1932 | // Then any remaining arguments | |||
1933 | for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { | |||
1934 | Ops.push_back(N->getOperand(i)); | |||
1935 | } | |||
1936 | ||||
1937 | SDValue NewSt = DAG.getMemIntrinsicNode( | |||
1938 | Opcode, DL, DAG.getVTList(MVT::Other), Ops, | |||
1939 | MemSD->getMemoryVT(), MemSD->getMemOperand()); | |||
1940 | ||||
1941 | //return DCI.CombineTo(N, NewSt, true); | |||
1942 | return NewSt; | |||
1943 | } | |||
1944 | ||||
1945 | return SDValue(); | |||
1946 | } | |||
1947 | ||||
1948 | // st i1 v, addr | |||
1949 | // => | |||
1950 | // v1 = zxt v to i16 | |||
1951 | // st.u8 i16, addr | |||
1952 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { | |||
1953 | SDNode *Node = Op.getNode(); | |||
1954 | SDLoc dl(Node); | |||
1955 | StoreSDNode *ST = cast<StoreSDNode>(Node); | |||
1956 | SDValue Tmp1 = ST->getChain(); | |||
1957 | SDValue Tmp2 = ST->getBasePtr(); | |||
1958 | SDValue Tmp3 = ST->getValue(); | |||
1959 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only")((Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only" ) ? static_cast<void> (0) : __assert_fail ("Tmp3.getValueType() == MVT::i1 && \"Custom lowering for i1 store only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1959, __PRETTY_FUNCTION__)); | |||
1960 | unsigned Alignment = ST->getAlignment(); | |||
1961 | bool isVolatile = ST->isVolatile(); | |||
1962 | bool isNonTemporal = ST->isNonTemporal(); | |||
1963 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); | |||
1964 | SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, | |||
1965 | ST->getPointerInfo(), MVT::i8, isNonTemporal, | |||
1966 | isVolatile, Alignment); | |||
1967 | return Result; | |||
1968 | } | |||
1969 | ||||
1970 | SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, | |||
1971 | int idx, EVT v) const { | |||
1972 | std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); | |||
1973 | std::stringstream suffix; | |||
1974 | suffix << idx; | |||
1975 | *name += suffix.str(); | |||
1976 | return DAG.getTargetExternalSymbol(name->c_str(), v); | |||
1977 | } | |||
1978 | ||||
1979 | SDValue | |||
1980 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { | |||
1981 | std::string ParamSym; | |||
1982 | raw_string_ostream ParamStr(ParamSym); | |||
1983 | ||||
1984 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; | |||
1985 | ParamStr.flush(); | |||
1986 | ||||
1987 | std::string *SavedStr = | |||
1988 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); | |||
1989 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); | |||
1990 | } | |||
1991 | ||||
1992 | SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { | |||
1993 | return getExtSymb(DAG, ".HLPPARAM", idx); | |||
1994 | } | |||
1995 | ||||
1996 | // Check to see if the kernel argument is image*_t or sampler_t | |||
1997 | ||||
1998 | bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { | |||
1999 | static const char *const specialTypes[] = { "struct._image2d_t", | |||
2000 | "struct._image3d_t", | |||
2001 | "struct._sampler_t" }; | |||
2002 | ||||
2003 | const Type *Ty = arg->getType(); | |||
2004 | const PointerType *PTy = dyn_cast<PointerType>(Ty); | |||
2005 | ||||
2006 | if (!PTy) | |||
2007 | return false; | |||
2008 | ||||
2009 | if (!context) | |||
2010 | return false; | |||
2011 | ||||
2012 | const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); | |||
2013 | const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; | |||
2014 | ||||
2015 | for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) | |||
2016 | if (TypeName == specialTypes[i]) | |||
2017 | return true; | |||
2018 | ||||
2019 | return false; | |||
2020 | } | |||
2021 | ||||
2022 | SDValue NVPTXTargetLowering::LowerFormalArguments( | |||
2023 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | |||
2024 | const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, | |||
2025 | SmallVectorImpl<SDValue> &InVals) const { | |||
2026 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2027 | const DataLayout *TD = getDataLayout(); | |||
2028 | ||||
2029 | const Function *F = MF.getFunction(); | |||
2030 | const AttributeSet &PAL = F->getAttributes(); | |||
2031 | const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); | |||
2032 | ||||
2033 | SDValue Root = DAG.getRoot(); | |||
2034 | std::vector<SDValue> OutChains; | |||
2035 | ||||
2036 | bool isKernel = llvm::isKernelFunction(*F); | |||
2037 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); | |||
2038 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2038, __PRETTY_FUNCTION__)); | |||
2039 | if (!isABI) | |||
2040 | return Chain; | |||
2041 | ||||
2042 | std::vector<Type *> argTypes; | |||
2043 | std::vector<const Argument *> theArgs; | |||
2044 | for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); | |||
2045 | I != E; ++I) { | |||
2046 | theArgs.push_back(I); | |||
2047 | argTypes.push_back(I->getType()); | |||
2048 | } | |||
2049 | // argTypes.size() (or theArgs.size()) and Ins.size() need not match. | |||
2050 | // Ins.size() will be larger | |||
2051 | // * if there is an aggregate argument with multiple fields (each field | |||
2052 | // showing up separately in Ins) | |||
2053 | // * if there is a vector argument with more than typical vector-length | |||
2054 | // elements (generally if more than 4) where each vector element is | |||
2055 | // individually present in Ins. | |||
2056 | // So a different index should be used for indexing into Ins. | |||
2057 | // See similar issue in LowerCall. | |||
2058 | unsigned InsIdx = 0; | |||
2059 | ||||
2060 | int idx = 0; | |||
2061 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { | |||
2062 | Type *Ty = argTypes[i]; | |||
2063 | ||||
2064 | // If the kernel argument is image*_t or sampler_t, convert it to | |||
2065 | // a i32 constant holding the parameter position. This can later | |||
2066 | // matched in the AsmPrinter to output the correct mangled name. | |||
2067 | if (isImageOrSamplerVal( | |||
2068 | theArgs[i], | |||
2069 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() | |||
2070 | : nullptr))) { | |||
2071 | assert(isKernel && "Only kernels can have image/sampler params")((isKernel && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("isKernel && \"Only kernels can have image/sampler params\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2071, __PRETTY_FUNCTION__)); | |||
2072 | InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); | |||
2073 | continue; | |||
2074 | } | |||
2075 | ||||
2076 | if (theArgs[i]->use_empty()) { | |||
2077 | // argument is dead | |||
2078 | if (Ty->isAggregateType()) { | |||
2079 | SmallVector<EVT, 16> vtparts; | |||
2080 | ||||
2081 | ComputePTXValueVTs(*this, Ty, vtparts); | |||
2082 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2082, __PRETTY_FUNCTION__)); | |||
2083 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2084 | ++parti) { | |||
2085 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2086 | ++InsIdx; | |||
2087 | } | |||
2088 | if (vtparts.size() > 0) | |||
2089 | --InsIdx; | |||
2090 | continue; | |||
2091 | } | |||
2092 | if (Ty->isVectorTy()) { | |||
2093 | EVT ObjectVT = getValueType(Ty); | |||
2094 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); | |||
2095 | for (unsigned parti = 0; parti < NumRegs; ++parti) { | |||
2096 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2097 | ++InsIdx; | |||
2098 | } | |||
2099 | if (NumRegs > 0) | |||
2100 | --InsIdx; | |||
2101 | continue; | |||
2102 | } | |||
2103 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2104 | continue; | |||
2105 | } | |||
2106 | ||||
2107 | // In the following cases, assign a node order of "idx+1" | |||
2108 | // to newly created nodes. The SDNodes for params have to | |||
2109 | // appear in the same order as their order of appearance | |||
2110 | // in the original function. "idx+1" holds that order. | |||
2111 | if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { | |||
2112 | if (Ty->isAggregateType()) { | |||
2113 | SmallVector<EVT, 16> vtparts; | |||
2114 | SmallVector<uint64_t, 16> offsets; | |||
2115 | ||||
2116 | // NOTE: Here, we lose the ability to issue vector loads for vectors | |||
2117 | // that are a part of a struct. This should be investigated in the | |||
2118 | // future. | |||
2119 | ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); | |||
2120 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2120, __PRETTY_FUNCTION__)); | |||
2121 | bool aggregateIsPacked = false; | |||
2122 | if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) | |||
2123 | aggregateIsPacked = STy->isPacked(); | |||
2124 | ||||
2125 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2126 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2127 | ++parti) { | |||
2128 | EVT partVT = vtparts[parti]; | |||
2129 | Value *srcValue = Constant::getNullValue( | |||
2130 | PointerType::get(partVT.getTypeForEVT(F->getContext()), | |||
2131 | llvm::ADDRESS_SPACE_PARAM)); | |||
2132 | SDValue srcAddr = | |||
2133 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, | |||
2134 | DAG.getConstant(offsets[parti], getPointerTy())); | |||
2135 | unsigned partAlign = | |||
2136 | aggregateIsPacked ? 1 | |||
2137 | : TD->getABITypeAlignment( | |||
2138 | partVT.getTypeForEVT(F->getContext())); | |||
2139 | SDValue p; | |||
2140 | if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { | |||
2141 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2142 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2143 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, | |||
2144 | MachinePointerInfo(srcValue), partVT, false, | |||
2145 | false, false, partAlign); | |||
2146 | } else { | |||
2147 | p = DAG.getLoad(partVT, dl, Root, srcAddr, | |||
2148 | MachinePointerInfo(srcValue), false, false, false, | |||
2149 | partAlign); | |||
2150 | } | |||
2151 | if (p.getNode()) | |||
2152 | p.getNode()->setIROrder(idx + 1); | |||
2153 | InVals.push_back(p); | |||
2154 | ++InsIdx; | |||
2155 | } | |||
2156 | if (vtparts.size() > 0) | |||
2157 | --InsIdx; | |||
2158 | continue; | |||
2159 | } | |||
2160 | if (Ty->isVectorTy()) { | |||
2161 | EVT ObjectVT = getValueType(Ty); | |||
2162 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2163 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
2164 | assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2165, __PRETTY_FUNCTION__)) | |||
2165 | "Vector was not scalarized")((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2165, __PRETTY_FUNCTION__)); | |||
2166 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
2167 | ||||
2168 | // V1 load | |||
2169 | // f32 = load ... | |||
2170 | if (NumElts == 1) { | |||
2171 | // We only have one element, so just directly load it | |||
2172 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2173 | EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2174 | SDValue P = DAG.getLoad( | |||
2175 | EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, | |||
2176 | false, true, | |||
2177 | TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); | |||
2178 | if (P.getNode()) | |||
2179 | P.getNode()->setIROrder(idx + 1); | |||
2180 | ||||
2181 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2182 | P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); | |||
2183 | InVals.push_back(P); | |||
2184 | ++InsIdx; | |||
2185 | } else if (NumElts == 2) { | |||
2186 | // V2 load | |||
2187 | // f32,f32 = load ... | |||
2188 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); | |||
2189 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2190 | VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2191 | SDValue P = DAG.getLoad( | |||
2192 | VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, | |||
2193 | false, true, | |||
2194 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); | |||
2195 | if (P.getNode()) | |||
2196 | P.getNode()->setIROrder(idx + 1); | |||
2197 | ||||
2198 | SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2199 | DAG.getIntPtrConstant(0)); | |||
2200 | SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2201 | DAG.getIntPtrConstant(1)); | |||
2202 | ||||
2203 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { | |||
2204 | Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); | |||
2205 | Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); | |||
2206 | } | |||
2207 | ||||
2208 | InVals.push_back(Elt0); | |||
2209 | InVals.push_back(Elt1); | |||
2210 | InsIdx += 2; | |||
2211 | } else { | |||
2212 | // V4 loads | |||
2213 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
2214 | // the | |||
2215 | // vector will be expanded to a power of 2 elements, so we know we can | |||
2216 | // always round up to the next multiple of 4 when creating the vector | |||
2217 | // loads. | |||
2218 | // e.g. 4 elem => 1 ld.v4 | |||
2219 | // 6 elem => 2 ld.v4 | |||
2220 | // 8 elem => 2 ld.v4 | |||
2221 | // 11 elem => 3 ld.v4 | |||
2222 | unsigned VecSize = 4; | |||
2223 | if (EltVT.getSizeInBits() == 64) { | |||
2224 | VecSize = 2; | |||
2225 | } | |||
2226 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2227 | unsigned Ofst = 0; | |||
2228 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2229 | Value *SrcValue = Constant::getNullValue( | |||
2230 | PointerType::get(VecVT.getTypeForEVT(F->getContext()), | |||
2231 | llvm::ADDRESS_SPACE_PARAM)); | |||
2232 | SDValue SrcAddr = | |||
2233 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, | |||
2234 | DAG.getConstant(Ofst, getPointerTy())); | |||
2235 | SDValue P = DAG.getLoad( | |||
2236 | VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, | |||
2237 | false, true, | |||
2238 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); | |||
2239 | if (P.getNode()) | |||
2240 | P.getNode()->setIROrder(idx + 1); | |||
2241 | ||||
2242 | for (unsigned j = 0; j < VecSize; ++j) { | |||
2243 | if (i + j >= NumElts) | |||
2244 | break; | |||
2245 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2246 | DAG.getIntPtrConstant(j)); | |||
2247 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2248 | Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); | |||
2249 | InVals.push_back(Elt); | |||
2250 | } | |||
2251 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2252 | } | |||
2253 | InsIdx += NumElts; | |||
2254 | } | |||
2255 | ||||
2256 | if (NumElts > 0) | |||
2257 | --InsIdx; | |||
2258 | continue; | |||
2259 | } | |||
2260 | // A plain scalar. | |||
2261 | EVT ObjectVT = getValueType(Ty); | |||
2262 | // If ABI, load from the param symbol | |||
2263 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2264 | Value *srcValue = Constant::getNullValue(PointerType::get( | |||
2265 | ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2266 | SDValue p; | |||
2267 | if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { | |||
2268 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2269 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2270 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, | |||
2271 | MachinePointerInfo(srcValue), ObjectVT, false, false, | |||
2272 | false, | |||
2273 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2274 | } else { | |||
2275 | p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, | |||
2276 | MachinePointerInfo(srcValue), false, false, false, | |||
2277 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2278 | } | |||
2279 | if (p.getNode()) | |||
2280 | p.getNode()->setIROrder(idx + 1); | |||
2281 | InVals.push_back(p); | |||
2282 | continue; | |||
2283 | } | |||
2284 | ||||
2285 | // Param has ByVal attribute | |||
2286 | // Return MoveParam(param symbol). | |||
2287 | // Ideally, the param symbol can be returned directly, | |||
2288 | // but when SDNode builder decides to use it in a CopyToReg(), | |||
2289 | // machine instruction fails because TargetExternalSymbol | |||
2290 | // (not lowered) is target dependent, and CopyToReg assumes | |||
2291 | // the source is lowered. | |||
2292 | EVT ObjectVT = getValueType(Ty); | |||
2293 | assert(ObjectVT == Ins[InsIdx].VT &&((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2294, __PRETTY_FUNCTION__)) | |||
2294 | "Ins type did not match function type")((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2294, __PRETTY_FUNCTION__)); | |||
2295 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2296 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); | |||
2297 | if (p.getNode()) | |||
2298 | p.getNode()->setIROrder(idx + 1); | |||
2299 | if (isKernel) | |||
2300 | InVals.push_back(p); | |||
2301 | else { | |||
2302 | SDValue p2 = DAG.getNode( | |||
2303 | ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, | |||
2304 | DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); | |||
2305 | InVals.push_back(p2); | |||
2306 | } | |||
2307 | } | |||
2308 | ||||
2309 | // Clang will check explicit VarArg and issue error if any. However, Clang | |||
2310 | // will let code with | |||
2311 | // implicit var arg like f() pass. See bug 617733. | |||
2312 | // We treat this case as if the arg list is empty. | |||
2313 | // if (F.isVarArg()) { | |||
2314 | // assert(0 && "VarArg not supported yet!"); | |||
2315 | //} | |||
2316 | ||||
2317 | if (!OutChains.empty()) | |||
2318 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); | |||
2319 | ||||
2320 | return Chain; | |||
2321 | } | |||
2322 | ||||
2323 | ||||
2324 | SDValue | |||
2325 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | |||
2326 | bool isVarArg, | |||
2327 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
2328 | const SmallVectorImpl<SDValue> &OutVals, | |||
2329 | SDLoc dl, SelectionDAG &DAG) const { | |||
2330 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2331 | const Function *F = MF.getFunction(); | |||
2332 | Type *RetTy = F->getReturnType(); | |||
2333 | const DataLayout *TD = getDataLayout(); | |||
2334 | ||||
2335 | bool isABI = (nvptxSubtarget.getSmVersion() >= 20); | |||
2336 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2336, __PRETTY_FUNCTION__)); | |||
2337 | if (!isABI) | |||
2338 | return Chain; | |||
2339 | ||||
2340 | if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { | |||
2341 | // If we have a vector type, the OutVals array will be the scalarized | |||
2342 | // components and we have combine them into 1 or more vector stores. | |||
2343 | unsigned NumElts = VTy->getNumElements(); | |||
2344 | assert(NumElts == Outs.size() && "Bad scalarization of return value")((NumElts == Outs.size() && "Bad scalarization of return value" ) ? static_cast<void> (0) : __assert_fail ("NumElts == Outs.size() && \"Bad scalarization of return value\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2344, __PRETTY_FUNCTION__)); | |||
2345 | ||||
2346 | // const_cast can be removed in later LLVM versions | |||
2347 | EVT EltVT = getValueType(RetTy).getVectorElementType(); | |||
2348 | bool NeedExtend = false; | |||
2349 | if (EltVT.getSizeInBits() < 16) | |||
2350 | NeedExtend = true; | |||
2351 | ||||
2352 | // V1 store | |||
2353 | if (NumElts == 1) { | |||
2354 | SDValue StoreVal = OutVals[0]; | |||
2355 | // We only have one element, so just directly store it | |||
2356 | if (NeedExtend) | |||
2357 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
2358 | SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; | |||
2359 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2360 | DAG.getVTList(MVT::Other), Ops, | |||
2361 | EltVT, MachinePointerInfo()); | |||
2362 | ||||
2363 | } else if (NumElts == 2) { | |||
2364 | // V2 store | |||
2365 | SDValue StoreVal0 = OutVals[0]; | |||
2366 | SDValue StoreVal1 = OutVals[1]; | |||
2367 | ||||
2368 | if (NeedExtend) { | |||
2369 | StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); | |||
2370 | StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); | |||
2371 | } | |||
2372 | ||||
2373 | SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, | |||
2374 | StoreVal1 }; | |||
2375 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, | |||
2376 | DAG.getVTList(MVT::Other), Ops, | |||
2377 | EltVT, MachinePointerInfo()); | |||
2378 | } else { | |||
2379 | // V4 stores | |||
2380 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the | |||
2381 | // vector will be expanded to a power of 2 elements, so we know we can | |||
2382 | // always round up to the next multiple of 4 when creating the vector | |||
2383 | // stores. | |||
2384 | // e.g. 4 elem => 1 st.v4 | |||
2385 | // 6 elem => 2 st.v4 | |||
2386 | // 8 elem => 2 st.v4 | |||
2387 | // 11 elem => 3 st.v4 | |||
2388 | ||||
2389 | unsigned VecSize = 4; | |||
2390 | if (OutVals[0].getValueType().getSizeInBits() == 64) | |||
2391 | VecSize = 2; | |||
2392 | ||||
2393 | unsigned Offset = 0; | |||
2394 | ||||
2395 | EVT VecVT = | |||
2396 | EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2397 | unsigned PerStoreOffset = | |||
2398 | TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2399 | ||||
2400 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2401 | // Get values | |||
2402 | SDValue StoreVal; | |||
2403 | SmallVector<SDValue, 8> Ops; | |||
2404 | Ops.push_back(Chain); | |||
2405 | Ops.push_back(DAG.getConstant(Offset, MVT::i32)); | |||
2406 | unsigned Opc = NVPTXISD::StoreRetvalV2; | |||
2407 | EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); | |||
2408 | ||||
2409 | StoreVal = OutVals[i]; | |||
2410 | if (NeedExtend) | |||
2411 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2412 | Ops.push_back(StoreVal); | |||
2413 | ||||
2414 | if (i + 1 < NumElts) { | |||
2415 | StoreVal = OutVals[i + 1]; | |||
2416 | if (NeedExtend) | |||
2417 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2418 | } else { | |||
2419 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2420 | } | |||
2421 | Ops.push_back(StoreVal); | |||
2422 | ||||
2423 | if (VecSize == 4) { | |||
2424 | Opc = NVPTXISD::StoreRetvalV4; | |||
2425 | if (i + 2 < NumElts) { | |||
2426 | StoreVal = OutVals[i + 2]; | |||
2427 | if (NeedExtend) | |||
2428 | StoreVal = | |||
2429 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2430 | } else { | |||
2431 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2432 | } | |||
2433 | Ops.push_back(StoreVal); | |||
2434 | ||||
2435 | if (i + 3 < NumElts) { | |||
2436 | StoreVal = OutVals[i + 3]; | |||
2437 | if (NeedExtend) | |||
2438 | StoreVal = | |||
2439 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2440 | } else { | |||
2441 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2442 | } | |||
2443 | Ops.push_back(StoreVal); | |||
2444 | } | |||
2445 | ||||
2446 | // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); | |||
2447 | Chain = | |||
2448 | DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, | |||
2449 | EltVT, MachinePointerInfo()); | |||
2450 | Offset += PerStoreOffset; | |||
2451 | } | |||
2452 | } | |||
2453 | } else { | |||
2454 | SmallVector<EVT, 16> ValVTs; | |||
2455 | SmallVector<uint64_t, 16> Offsets; | |||
2456 | ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); | |||
2457 | assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition")((ValVTs.size() == OutVals.size() && "Bad return value decomposition" ) ? static_cast<void> (0) : __assert_fail ("ValVTs.size() == OutVals.size() && \"Bad return value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2457, __PRETTY_FUNCTION__)); | |||
2458 | ||||
2459 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { | |||
2460 | SDValue theVal = OutVals[i]; | |||
2461 | EVT TheValType = theVal.getValueType(); | |||
2462 | unsigned numElems = 1; | |||
2463 | if (TheValType.isVector()) | |||
2464 | numElems = TheValType.getVectorNumElements(); | |||
2465 | for (unsigned j = 0, je = numElems; j != je; ++j) { | |||
2466 | SDValue TmpVal = theVal; | |||
2467 | if (TheValType.isVector()) | |||
2468 | TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, | |||
2469 | TheValType.getVectorElementType(), TmpVal, | |||
2470 | DAG.getIntPtrConstant(j)); | |||
2471 | EVT TheStoreType = ValVTs[i]; | |||
2472 | if (RetTy->isIntegerTy() && | |||
2473 | TD->getTypeAllocSizeInBits(RetTy) < 32) { | |||
2474 | // The following zero-extension is for integer types only, and | |||
2475 | // specifically not for aggregates. | |||
2476 | TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); | |||
2477 | TheStoreType = MVT::i32; | |||
2478 | } | |||
2479 | else if (TmpVal.getValueType().getSizeInBits() < 16) | |||
2480 | TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); | |||
2481 | ||||
2482 | SDValue Ops[] = { | |||
2483 | Chain, | |||
2484 | DAG.getConstant(Offsets[i], MVT::i32), | |||
2485 | TmpVal }; | |||
2486 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2487 | DAG.getVTList(MVT::Other), Ops, | |||
2488 | TheStoreType, | |||
2489 | MachinePointerInfo()); | |||
2490 | } | |||
2491 | } | |||
2492 | } | |||
2493 | ||||
2494 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); | |||
2495 | } | |||
2496 | ||||
2497 | ||||
2498 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( | |||
2499 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, | |||
2500 | SelectionDAG &DAG) const { | |||
2501 | if (Constraint.length() > 1) | |||
2502 | return; | |||
2503 | else | |||
2504 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); | |||
2505 | } | |||
2506 | ||||
2507 | // NVPTX suuport vector of legal types of any length in Intrinsics because the | |||
2508 | // NVPTX specific type legalizer | |||
2509 | // will legalize them to the PTX supported length. | |||
2510 | bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { | |||
2511 | if (isTypeLegal(VT)) | |||
2512 | return true; | |||
2513 | if (VT.isVector()) { | |||
2514 | MVT eVT = VT.getVectorElementType(); | |||
2515 | if (isTypeLegal(eVT)) | |||
2516 | return true; | |||
2517 | } | |||
2518 | return false; | |||
2519 | } | |||
2520 | ||||
2521 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { | |||
2522 | switch (Intrinsic) { | |||
2523 | default: | |||
2524 | return 0; | |||
2525 | ||||
2526 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
2527 | return NVPTXISD::Tex1DFloatS32; | |||
2528 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
2529 | return NVPTXISD::Tex1DFloatFloat; | |||
2530 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
2531 | return NVPTXISD::Tex1DFloatFloatLevel; | |||
2532 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
2533 | return NVPTXISD::Tex1DFloatFloatGrad; | |||
2534 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
2535 | return NVPTXISD::Tex1DS32S32; | |||
2536 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
2537 | return NVPTXISD::Tex1DS32Float; | |||
2538 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
2539 | return NVPTXISD::Tex1DS32FloatLevel; | |||
2540 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
2541 | return NVPTXISD::Tex1DS32FloatGrad; | |||
2542 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
2543 | return NVPTXISD::Tex1DU32S32; | |||
2544 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
2545 | return NVPTXISD::Tex1DU32Float; | |||
2546 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
2547 | return NVPTXISD::Tex1DU32FloatLevel; | |||
2548 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
2549 | return NVPTXISD::Tex1DU32FloatGrad; | |||
2550 | ||||
2551 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
2552 | return NVPTXISD::Tex1DArrayFloatS32; | |||
2553 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
2554 | return NVPTXISD::Tex1DArrayFloatFloat; | |||
2555 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
2556 | return NVPTXISD::Tex1DArrayFloatFloatLevel; | |||
2557 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
2558 | return NVPTXISD::Tex1DArrayFloatFloatGrad; | |||
2559 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
2560 | return NVPTXISD::Tex1DArrayS32S32; | |||
2561 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
2562 | return NVPTXISD::Tex1DArrayS32Float; | |||
2563 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
2564 | return NVPTXISD::Tex1DArrayS32FloatLevel; | |||
2565 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
2566 | return NVPTXISD::Tex1DArrayS32FloatGrad; | |||
2567 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
2568 | return NVPTXISD::Tex1DArrayU32S32; | |||
2569 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
2570 | return NVPTXISD::Tex1DArrayU32Float; | |||
2571 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
2572 | return NVPTXISD::Tex1DArrayU32FloatLevel; | |||
2573 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
2574 | return NVPTXISD::Tex1DArrayU32FloatGrad; | |||
2575 | ||||
2576 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
2577 | return NVPTXISD::Tex2DFloatS32; | |||
2578 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
2579 | return NVPTXISD::Tex2DFloatFloat; | |||
2580 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
2581 | return NVPTXISD::Tex2DFloatFloatLevel; | |||
2582 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
2583 | return NVPTXISD::Tex2DFloatFloatGrad; | |||
2584 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
2585 | return NVPTXISD::Tex2DS32S32; | |||
2586 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
2587 | return NVPTXISD::Tex2DS32Float; | |||
2588 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
2589 | return NVPTXISD::Tex2DS32FloatLevel; | |||
2590 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
2591 | return NVPTXISD::Tex2DS32FloatGrad; | |||
2592 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
2593 | return NVPTXISD::Tex2DU32S32; | |||
2594 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
2595 | return NVPTXISD::Tex2DU32Float; | |||
2596 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
2597 | return NVPTXISD::Tex2DU32FloatLevel; | |||
2598 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
2599 | return NVPTXISD::Tex2DU32FloatGrad; | |||
2600 | ||||
2601 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
2602 | return NVPTXISD::Tex2DArrayFloatS32; | |||
2603 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
2604 | return NVPTXISD::Tex2DArrayFloatFloat; | |||
2605 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
2606 | return NVPTXISD::Tex2DArrayFloatFloatLevel; | |||
2607 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
2608 | return NVPTXISD::Tex2DArrayFloatFloatGrad; | |||
2609 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
2610 | return NVPTXISD::Tex2DArrayS32S32; | |||
2611 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
2612 | return NVPTXISD::Tex2DArrayS32Float; | |||
2613 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
2614 | return NVPTXISD::Tex2DArrayS32FloatLevel; | |||
2615 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
2616 | return NVPTXISD::Tex2DArrayS32FloatGrad; | |||
2617 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
2618 | return NVPTXISD::Tex2DArrayU32S32; | |||
2619 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
2620 | return NVPTXISD::Tex2DArrayU32Float; | |||
2621 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
2622 | return NVPTXISD::Tex2DArrayU32FloatLevel; | |||
2623 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
2624 | return NVPTXISD::Tex2DArrayU32FloatGrad; | |||
2625 | ||||
2626 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
2627 | return NVPTXISD::Tex3DFloatS32; | |||
2628 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
2629 | return NVPTXISD::Tex3DFloatFloat; | |||
2630 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
2631 | return NVPTXISD::Tex3DFloatFloatLevel; | |||
2632 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
2633 | return NVPTXISD::Tex3DFloatFloatGrad; | |||
2634 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
2635 | return NVPTXISD::Tex3DS32S32; | |||
2636 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
2637 | return NVPTXISD::Tex3DS32Float; | |||
2638 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
2639 | return NVPTXISD::Tex3DS32FloatLevel; | |||
2640 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
2641 | return NVPTXISD::Tex3DS32FloatGrad; | |||
2642 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
2643 | return NVPTXISD::Tex3DU32S32; | |||
2644 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
2645 | return NVPTXISD::Tex3DU32Float; | |||
2646 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
2647 | return NVPTXISD::Tex3DU32FloatLevel; | |||
2648 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
2649 | return NVPTXISD::Tex3DU32FloatGrad; | |||
2650 | ||||
2651 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
2652 | return NVPTXISD::TexCubeFloatFloat; | |||
2653 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
2654 | return NVPTXISD::TexCubeFloatFloatLevel; | |||
2655 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
2656 | return NVPTXISD::TexCubeS32Float; | |||
2657 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
2658 | return NVPTXISD::TexCubeS32FloatLevel; | |||
2659 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
2660 | return NVPTXISD::TexCubeU32Float; | |||
2661 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
2662 | return NVPTXISD::TexCubeU32FloatLevel; | |||
2663 | ||||
2664 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
2665 | return NVPTXISD::TexCubeArrayFloatFloat; | |||
2666 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
2667 | return NVPTXISD::TexCubeArrayFloatFloatLevel; | |||
2668 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
2669 | return NVPTXISD::TexCubeArrayS32Float; | |||
2670 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
2671 | return NVPTXISD::TexCubeArrayS32FloatLevel; | |||
2672 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
2673 | return NVPTXISD::TexCubeArrayU32Float; | |||
2674 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
2675 | return NVPTXISD::TexCubeArrayU32FloatLevel; | |||
2676 | ||||
2677 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
2678 | return NVPTXISD::Tld4R2DFloatFloat; | |||
2679 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
2680 | return NVPTXISD::Tld4G2DFloatFloat; | |||
2681 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
2682 | return NVPTXISD::Tld4B2DFloatFloat; | |||
2683 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
2684 | return NVPTXISD::Tld4A2DFloatFloat; | |||
2685 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
2686 | return NVPTXISD::Tld4R2DS64Float; | |||
2687 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
2688 | return NVPTXISD::Tld4G2DS64Float; | |||
2689 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
2690 | return NVPTXISD::Tld4B2DS64Float; | |||
2691 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
2692 | return NVPTXISD::Tld4A2DS64Float; | |||
2693 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
2694 | return NVPTXISD::Tld4R2DU64Float; | |||
2695 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
2696 | return NVPTXISD::Tld4G2DU64Float; | |||
2697 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
2698 | return NVPTXISD::Tld4B2DU64Float; | |||
2699 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
2700 | return NVPTXISD::Tld4A2DU64Float; | |||
2701 | ||||
2702 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
2703 | return NVPTXISD::TexUnified1DFloatS32; | |||
2704 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
2705 | return NVPTXISD::TexUnified1DFloatFloat; | |||
2706 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
2707 | return NVPTXISD::TexUnified1DFloatFloatLevel; | |||
2708 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
2709 | return NVPTXISD::TexUnified1DFloatFloatGrad; | |||
2710 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
2711 | return NVPTXISD::TexUnified1DS32S32; | |||
2712 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
2713 | return NVPTXISD::TexUnified1DS32Float; | |||
2714 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
2715 | return NVPTXISD::TexUnified1DS32FloatLevel; | |||
2716 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
2717 | return NVPTXISD::TexUnified1DS32FloatGrad; | |||
2718 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
2719 | return NVPTXISD::TexUnified1DU32S32; | |||
2720 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
2721 | return NVPTXISD::TexUnified1DU32Float; | |||
2722 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
2723 | return NVPTXISD::TexUnified1DU32FloatLevel; | |||
2724 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
2725 | return NVPTXISD::TexUnified1DU32FloatGrad; | |||
2726 | ||||
2727 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
2728 | return NVPTXISD::TexUnified1DArrayFloatS32; | |||
2729 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
2730 | return NVPTXISD::TexUnified1DArrayFloatFloat; | |||
2731 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
2732 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; | |||
2733 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
2734 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; | |||
2735 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
2736 | return NVPTXISD::TexUnified1DArrayS32S32; | |||
2737 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
2738 | return NVPTXISD::TexUnified1DArrayS32Float; | |||
2739 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
2740 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; | |||
2741 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
2742 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; | |||
2743 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
2744 | return NVPTXISD::TexUnified1DArrayU32S32; | |||
2745 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
2746 | return NVPTXISD::TexUnified1DArrayU32Float; | |||
2747 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
2748 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; | |||
2749 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
2750 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; | |||
2751 | ||||
2752 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
2753 | return NVPTXISD::TexUnified2DFloatS32; | |||
2754 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
2755 | return NVPTXISD::TexUnified2DFloatFloat; | |||
2756 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
2757 | return NVPTXISD::TexUnified2DFloatFloatLevel; | |||
2758 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
2759 | return NVPTXISD::TexUnified2DFloatFloatGrad; | |||
2760 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
2761 | return NVPTXISD::TexUnified2DS32S32; | |||
2762 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
2763 | return NVPTXISD::TexUnified2DS32Float; | |||
2764 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
2765 | return NVPTXISD::TexUnified2DS32FloatLevel; | |||
2766 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
2767 | return NVPTXISD::TexUnified2DS32FloatGrad; | |||
2768 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
2769 | return NVPTXISD::TexUnified2DU32S32; | |||
2770 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
2771 | return NVPTXISD::TexUnified2DU32Float; | |||
2772 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
2773 | return NVPTXISD::TexUnified2DU32FloatLevel; | |||
2774 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
2775 | return NVPTXISD::TexUnified2DU32FloatGrad; | |||
2776 | ||||
2777 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
2778 | return NVPTXISD::TexUnified2DArrayFloatS32; | |||
2779 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
2780 | return NVPTXISD::TexUnified2DArrayFloatFloat; | |||
2781 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
2782 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; | |||
2783 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
2784 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; | |||
2785 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
2786 | return NVPTXISD::TexUnified2DArrayS32S32; | |||
2787 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
2788 | return NVPTXISD::TexUnified2DArrayS32Float; | |||
2789 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
2790 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; | |||
2791 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
2792 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; | |||
2793 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
2794 | return NVPTXISD::TexUnified2DArrayU32S32; | |||
2795 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
2796 | return NVPTXISD::TexUnified2DArrayU32Float; | |||
2797 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
2798 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; | |||
2799 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
2800 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; | |||
2801 | ||||
2802 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
2803 | return NVPTXISD::TexUnified3DFloatS32; | |||
2804 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
2805 | return NVPTXISD::TexUnified3DFloatFloat; | |||
2806 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
2807 | return NVPTXISD::TexUnified3DFloatFloatLevel; | |||
2808 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
2809 | return NVPTXISD::TexUnified3DFloatFloatGrad; | |||
2810 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
2811 | return NVPTXISD::TexUnified3DS32S32; | |||
2812 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
2813 | return NVPTXISD::TexUnified3DS32Float; | |||
2814 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
2815 | return NVPTXISD::TexUnified3DS32FloatLevel; | |||
2816 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
2817 | return NVPTXISD::TexUnified3DS32FloatGrad; | |||
2818 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
2819 | return NVPTXISD::TexUnified3DU32S32; | |||
2820 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
2821 | return NVPTXISD::TexUnified3DU32Float; | |||
2822 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
2823 | return NVPTXISD::TexUnified3DU32FloatLevel; | |||
2824 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
2825 | return NVPTXISD::TexUnified3DU32FloatGrad; | |||
2826 | ||||
2827 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
2828 | return NVPTXISD::TexUnifiedCubeFloatFloat; | |||
2829 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
2830 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; | |||
2831 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
2832 | return NVPTXISD::TexUnifiedCubeS32Float; | |||
2833 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
2834 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; | |||
2835 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
2836 | return NVPTXISD::TexUnifiedCubeU32Float; | |||
2837 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
2838 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; | |||
2839 | ||||
2840 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
2841 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; | |||
2842 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
2843 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; | |||
2844 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
2845 | return NVPTXISD::TexUnifiedCubeArrayS32Float; | |||
2846 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
2847 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; | |||
2848 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
2849 | return NVPTXISD::TexUnifiedCubeArrayU32Float; | |||
2850 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
2851 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; | |||
2852 | ||||
2853 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
2854 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; | |||
2855 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
2856 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; | |||
2857 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
2858 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; | |||
2859 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: | |||
2860 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; | |||
2861 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
2862 | return NVPTXISD::Tld4UnifiedR2DS64Float; | |||
2863 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
2864 | return NVPTXISD::Tld4UnifiedG2DS64Float; | |||
2865 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
2866 | return NVPTXISD::Tld4UnifiedB2DS64Float; | |||
2867 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
2868 | return NVPTXISD::Tld4UnifiedA2DS64Float; | |||
2869 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
2870 | return NVPTXISD::Tld4UnifiedR2DU64Float; | |||
2871 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
2872 | return NVPTXISD::Tld4UnifiedG2DU64Float; | |||
2873 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
2874 | return NVPTXISD::Tld4UnifiedB2DU64Float; | |||
2875 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: | |||
2876 | return NVPTXISD::Tld4UnifiedA2DU64Float; | |||
2877 | } | |||
2878 | } | |||
2879 | ||||
2880 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { | |||
2881 | switch (Intrinsic) { | |||
2882 | default: | |||
2883 | return 0; | |||
2884 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
2885 | return NVPTXISD::Suld1DI8Clamp; | |||
2886 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
2887 | return NVPTXISD::Suld1DI16Clamp; | |||
2888 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
2889 | return NVPTXISD::Suld1DI32Clamp; | |||
2890 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
2891 | return NVPTXISD::Suld1DI64Clamp; | |||
2892 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
2893 | return NVPTXISD::Suld1DV2I8Clamp; | |||
2894 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
2895 | return NVPTXISD::Suld1DV2I16Clamp; | |||
2896 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
2897 | return NVPTXISD::Suld1DV2I32Clamp; | |||
2898 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
2899 | return NVPTXISD::Suld1DV2I64Clamp; | |||
2900 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
2901 | return NVPTXISD::Suld1DV4I8Clamp; | |||
2902 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
2903 | return NVPTXISD::Suld1DV4I16Clamp; | |||
2904 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
2905 | return NVPTXISD::Suld1DV4I32Clamp; | |||
2906 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
2907 | return NVPTXISD::Suld1DArrayI8Clamp; | |||
2908 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
2909 | return NVPTXISD::Suld1DArrayI16Clamp; | |||
2910 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
2911 | return NVPTXISD::Suld1DArrayI32Clamp; | |||
2912 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
2913 | return NVPTXISD::Suld1DArrayI64Clamp; | |||
2914 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
2915 | return NVPTXISD::Suld1DArrayV2I8Clamp; | |||
2916 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
2917 | return NVPTXISD::Suld1DArrayV2I16Clamp; | |||
2918 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
2919 | return NVPTXISD::Suld1DArrayV2I32Clamp; | |||
2920 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
2921 | return NVPTXISD::Suld1DArrayV2I64Clamp; | |||
2922 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
2923 | return NVPTXISD::Suld1DArrayV4I8Clamp; | |||
2924 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
2925 | return NVPTXISD::Suld1DArrayV4I16Clamp; | |||
2926 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
2927 | return NVPTXISD::Suld1DArrayV4I32Clamp; | |||
2928 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
2929 | return NVPTXISD::Suld2DI8Clamp; | |||
2930 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
2931 | return NVPTXISD::Suld2DI16Clamp; | |||
2932 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
2933 | return NVPTXISD::Suld2DI32Clamp; | |||
2934 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
2935 | return NVPTXISD::Suld2DI64Clamp; | |||
2936 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
2937 | return NVPTXISD::Suld2DV2I8Clamp; | |||
2938 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
2939 | return NVPTXISD::Suld2DV2I16Clamp; | |||
2940 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
2941 | return NVPTXISD::Suld2DV2I32Clamp; | |||
2942 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
2943 | return NVPTXISD::Suld2DV2I64Clamp; | |||
2944 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
2945 | return NVPTXISD::Suld2DV4I8Clamp; | |||
2946 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
2947 | return NVPTXISD::Suld2DV4I16Clamp; | |||
2948 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
2949 | return NVPTXISD::Suld2DV4I32Clamp; | |||
2950 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
2951 | return NVPTXISD::Suld2DArrayI8Clamp; | |||
2952 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
2953 | return NVPTXISD::Suld2DArrayI16Clamp; | |||
2954 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
2955 | return NVPTXISD::Suld2DArrayI32Clamp; | |||
2956 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
2957 | return NVPTXISD::Suld2DArrayI64Clamp; | |||
2958 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
2959 | return NVPTXISD::Suld2DArrayV2I8Clamp; | |||
2960 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
2961 | return NVPTXISD::Suld2DArrayV2I16Clamp; | |||
2962 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
2963 | return NVPTXISD::Suld2DArrayV2I32Clamp; | |||
2964 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
2965 | return NVPTXISD::Suld2DArrayV2I64Clamp; | |||
2966 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
2967 | return NVPTXISD::Suld2DArrayV4I8Clamp; | |||
2968 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
2969 | return NVPTXISD::Suld2DArrayV4I16Clamp; | |||
2970 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
2971 | return NVPTXISD::Suld2DArrayV4I32Clamp; | |||
2972 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
2973 | return NVPTXISD::Suld3DI8Clamp; | |||
2974 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
2975 | return NVPTXISD::Suld3DI16Clamp; | |||
2976 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
2977 | return NVPTXISD::Suld3DI32Clamp; | |||
2978 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
2979 | return NVPTXISD::Suld3DI64Clamp; | |||
2980 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
2981 | return NVPTXISD::Suld3DV2I8Clamp; | |||
2982 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
2983 | return NVPTXISD::Suld3DV2I16Clamp; | |||
2984 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
2985 | return NVPTXISD::Suld3DV2I32Clamp; | |||
2986 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
2987 | return NVPTXISD::Suld3DV2I64Clamp; | |||
2988 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
2989 | return NVPTXISD::Suld3DV4I8Clamp; | |||
2990 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
2991 | return NVPTXISD::Suld3DV4I16Clamp; | |||
2992 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
2993 | return NVPTXISD::Suld3DV4I32Clamp; | |||
2994 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
2995 | return NVPTXISD::Suld1DI8Trap; | |||
2996 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
2997 | return NVPTXISD::Suld1DI16Trap; | |||
2998 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
2999 | return NVPTXISD::Suld1DI32Trap; | |||
3000 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3001 | return NVPTXISD::Suld1DI64Trap; | |||
3002 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3003 | return NVPTXISD::Suld1DV2I8Trap; | |||
3004 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3005 | return NVPTXISD::Suld1DV2I16Trap; | |||
3006 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3007 | return NVPTXISD::Suld1DV2I32Trap; | |||
3008 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3009 | return NVPTXISD::Suld1DV2I64Trap; | |||
3010 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3011 | return NVPTXISD::Suld1DV4I8Trap; | |||
3012 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3013 | return NVPTXISD::Suld1DV4I16Trap; | |||
3014 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3015 | return NVPTXISD::Suld1DV4I32Trap; | |||
3016 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3017 | return NVPTXISD::Suld1DArrayI8Trap; | |||
3018 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3019 | return NVPTXISD::Suld1DArrayI16Trap; | |||
3020 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3021 | return NVPTXISD::Suld1DArrayI32Trap; | |||
3022 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3023 | return NVPTXISD::Suld1DArrayI64Trap; | |||
3024 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3025 | return NVPTXISD::Suld1DArrayV2I8Trap; | |||
3026 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3027 | return NVPTXISD::Suld1DArrayV2I16Trap; | |||
3028 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3029 | return NVPTXISD::Suld1DArrayV2I32Trap; | |||
3030 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3031 | return NVPTXISD::Suld1DArrayV2I64Trap; | |||
3032 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3033 | return NVPTXISD::Suld1DArrayV4I8Trap; | |||
3034 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3035 | return NVPTXISD::Suld1DArrayV4I16Trap; | |||
3036 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3037 | return NVPTXISD::Suld1DArrayV4I32Trap; | |||
3038 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3039 | return NVPTXISD::Suld2DI8Trap; | |||
3040 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3041 | return NVPTXISD::Suld2DI16Trap; | |||
3042 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3043 | return NVPTXISD::Suld2DI32Trap; | |||
3044 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3045 | return NVPTXISD::Suld2DI64Trap; | |||
3046 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3047 | return NVPTXISD::Suld2DV2I8Trap; | |||
3048 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3049 | return NVPTXISD::Suld2DV2I16Trap; | |||
3050 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3051 | return NVPTXISD::Suld2DV2I32Trap; | |||
3052 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3053 | return NVPTXISD::Suld2DV2I64Trap; | |||
3054 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3055 | return NVPTXISD::Suld2DV4I8Trap; | |||
3056 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3057 | return NVPTXISD::Suld2DV4I16Trap; | |||
3058 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3059 | return NVPTXISD::Suld2DV4I32Trap; | |||
3060 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3061 | return NVPTXISD::Suld2DArrayI8Trap; | |||
3062 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3063 | return NVPTXISD::Suld2DArrayI16Trap; | |||
3064 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3065 | return NVPTXISD::Suld2DArrayI32Trap; | |||
3066 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3067 | return NVPTXISD::Suld2DArrayI64Trap; | |||
3068 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3069 | return NVPTXISD::Suld2DArrayV2I8Trap; | |||
3070 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3071 | return NVPTXISD::Suld2DArrayV2I16Trap; | |||
3072 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3073 | return NVPTXISD::Suld2DArrayV2I32Trap; | |||
3074 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3075 | return NVPTXISD::Suld2DArrayV2I64Trap; | |||
3076 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3077 | return NVPTXISD::Suld2DArrayV4I8Trap; | |||
3078 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3079 | return NVPTXISD::Suld2DArrayV4I16Trap; | |||
3080 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3081 | return NVPTXISD::Suld2DArrayV4I32Trap; | |||
3082 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3083 | return NVPTXISD::Suld3DI8Trap; | |||
3084 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3085 | return NVPTXISD::Suld3DI16Trap; | |||
3086 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3087 | return NVPTXISD::Suld3DI32Trap; | |||
3088 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3089 | return NVPTXISD::Suld3DI64Trap; | |||
3090 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3091 | return NVPTXISD::Suld3DV2I8Trap; | |||
3092 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3093 | return NVPTXISD::Suld3DV2I16Trap; | |||
3094 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3095 | return NVPTXISD::Suld3DV2I32Trap; | |||
3096 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3097 | return NVPTXISD::Suld3DV2I64Trap; | |||
3098 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3099 | return NVPTXISD::Suld3DV4I8Trap; | |||
3100 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3101 | return NVPTXISD::Suld3DV4I16Trap; | |||
3102 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3103 | return NVPTXISD::Suld3DV4I32Trap; | |||
3104 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3105 | return NVPTXISD::Suld1DI8Zero; | |||
3106 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3107 | return NVPTXISD::Suld1DI16Zero; | |||
3108 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3109 | return NVPTXISD::Suld1DI32Zero; | |||
3110 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3111 | return NVPTXISD::Suld1DI64Zero; | |||
3112 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3113 | return NVPTXISD::Suld1DV2I8Zero; | |||
3114 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3115 | return NVPTXISD::Suld1DV2I16Zero; | |||
3116 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3117 | return NVPTXISD::Suld1DV2I32Zero; | |||
3118 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3119 | return NVPTXISD::Suld1DV2I64Zero; | |||
3120 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3121 | return NVPTXISD::Suld1DV4I8Zero; | |||
3122 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3123 | return NVPTXISD::Suld1DV4I16Zero; | |||
3124 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3125 | return NVPTXISD::Suld1DV4I32Zero; | |||
3126 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3127 | return NVPTXISD::Suld1DArrayI8Zero; | |||
3128 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3129 | return NVPTXISD::Suld1DArrayI16Zero; | |||
3130 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3131 | return NVPTXISD::Suld1DArrayI32Zero; | |||
3132 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3133 | return NVPTXISD::Suld1DArrayI64Zero; | |||
3134 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3135 | return NVPTXISD::Suld1DArrayV2I8Zero; | |||
3136 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3137 | return NVPTXISD::Suld1DArrayV2I16Zero; | |||
3138 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3139 | return NVPTXISD::Suld1DArrayV2I32Zero; | |||
3140 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3141 | return NVPTXISD::Suld1DArrayV2I64Zero; | |||
3142 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3143 | return NVPTXISD::Suld1DArrayV4I8Zero; | |||
3144 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3145 | return NVPTXISD::Suld1DArrayV4I16Zero; | |||
3146 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3147 | return NVPTXISD::Suld1DArrayV4I32Zero; | |||
3148 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3149 | return NVPTXISD::Suld2DI8Zero; | |||
3150 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3151 | return NVPTXISD::Suld2DI16Zero; | |||
3152 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3153 | return NVPTXISD::Suld2DI32Zero; | |||
3154 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3155 | return NVPTXISD::Suld2DI64Zero; | |||
3156 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3157 | return NVPTXISD::Suld2DV2I8Zero; | |||
3158 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3159 | return NVPTXISD::Suld2DV2I16Zero; | |||
3160 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3161 | return NVPTXISD::Suld2DV2I32Zero; | |||
3162 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3163 | return NVPTXISD::Suld2DV2I64Zero; | |||
3164 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3165 | return NVPTXISD::Suld2DV4I8Zero; | |||
3166 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3167 | return NVPTXISD::Suld2DV4I16Zero; | |||
3168 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3169 | return NVPTXISD::Suld2DV4I32Zero; | |||
3170 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3171 | return NVPTXISD::Suld2DArrayI8Zero; | |||
3172 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3173 | return NVPTXISD::Suld2DArrayI16Zero; | |||
3174 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3175 | return NVPTXISD::Suld2DArrayI32Zero; | |||
3176 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3177 | return NVPTXISD::Suld2DArrayI64Zero; | |||
3178 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3179 | return NVPTXISD::Suld2DArrayV2I8Zero; | |||
3180 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3181 | return NVPTXISD::Suld2DArrayV2I16Zero; | |||
3182 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3183 | return NVPTXISD::Suld2DArrayV2I32Zero; | |||
3184 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3185 | return NVPTXISD::Suld2DArrayV2I64Zero; | |||
3186 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3187 | return NVPTXISD::Suld2DArrayV4I8Zero; | |||
3188 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3189 | return NVPTXISD::Suld2DArrayV4I16Zero; | |||
3190 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3191 | return NVPTXISD::Suld2DArrayV4I32Zero; | |||
3192 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3193 | return NVPTXISD::Suld3DI8Zero; | |||
3194 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3195 | return NVPTXISD::Suld3DI16Zero; | |||
3196 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3197 | return NVPTXISD::Suld3DI32Zero; | |||
3198 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3199 | return NVPTXISD::Suld3DI64Zero; | |||
3200 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3201 | return NVPTXISD::Suld3DV2I8Zero; | |||
3202 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3203 | return NVPTXISD::Suld3DV2I16Zero; | |||
3204 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3205 | return NVPTXISD::Suld3DV2I32Zero; | |||
3206 | case Intrinsic::nvvm_suld_3d_v2i64_zero: | |||
3207 | return NVPTXISD::Suld3DV2I64Zero; | |||
3208 | case Intrinsic::nvvm_suld_3d_v4i8_zero: | |||
3209 | return NVPTXISD::Suld3DV4I8Zero; | |||
3210 | case Intrinsic::nvvm_suld_3d_v4i16_zero: | |||
3211 | return NVPTXISD::Suld3DV4I16Zero; | |||
3212 | case Intrinsic::nvvm_suld_3d_v4i32_zero: | |||
3213 | return NVPTXISD::Suld3DV4I32Zero; | |||
3214 | } | |||
3215 | } | |||
3216 | ||||
3217 | // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as | |||
3218 | // TgtMemIntrinsic | |||
3219 | // because we need the information that is only available in the "Value" type | |||
3220 | // of destination | |||
3221 | // pointer. In particular, the address space information. | |||
3222 | bool NVPTXTargetLowering::getTgtMemIntrinsic( | |||
3223 | IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { | |||
3224 | switch (Intrinsic) { | |||
3225 | default: | |||
3226 | return false; | |||
3227 | ||||
3228 | case Intrinsic::nvvm_atomic_load_add_f32: | |||
3229 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3230 | Info.memVT = MVT::f32; | |||
3231 | Info.ptrVal = I.getArgOperand(0); | |||
3232 | Info.offset = 0; | |||
3233 | Info.vol = 0; | |||
3234 | Info.readMem = true; | |||
3235 | Info.writeMem = true; | |||
3236 | Info.align = 0; | |||
3237 | return true; | |||
3238 | ||||
3239 | case Intrinsic::nvvm_atomic_load_inc_32: | |||
3240 | case Intrinsic::nvvm_atomic_load_dec_32: | |||
3241 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3242 | Info.memVT = MVT::i32; | |||
3243 | Info.ptrVal = I.getArgOperand(0); | |||
3244 | Info.offset = 0; | |||
3245 | Info.vol = 0; | |||
3246 | Info.readMem = true; | |||
3247 | Info.writeMem = true; | |||
3248 | Info.align = 0; | |||
3249 | return true; | |||
3250 | ||||
3251 | case Intrinsic::nvvm_ldu_global_i: | |||
3252 | case Intrinsic::nvvm_ldu_global_f: | |||
3253 | case Intrinsic::nvvm_ldu_global_p: { | |||
3254 | ||||
3255 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3256 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) | |||
3257 | Info.memVT = getValueType(I.getType()); | |||
3258 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) | |||
3259 | Info.memVT = getPointerTy(); | |||
3260 | else | |||
3261 | Info.memVT = getValueType(I.getType()); | |||
3262 | Info.ptrVal = I.getArgOperand(0); | |||
3263 | Info.offset = 0; | |||
3264 | Info.vol = 0; | |||
3265 | Info.readMem = true; | |||
3266 | Info.writeMem = false; | |||
3267 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3268 | ||||
3269 | return true; | |||
3270 | } | |||
3271 | case Intrinsic::nvvm_ldg_global_i: | |||
3272 | case Intrinsic::nvvm_ldg_global_f: | |||
3273 | case Intrinsic::nvvm_ldg_global_p: { | |||
3274 | ||||
3275 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3276 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) | |||
3277 | Info.memVT = getValueType(I.getType()); | |||
3278 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) | |||
3279 | Info.memVT = getPointerTy(); | |||
3280 | else | |||
3281 | Info.memVT = getValueType(I.getType()); | |||
3282 | Info.ptrVal = I.getArgOperand(0); | |||
3283 | Info.offset = 0; | |||
3284 | Info.vol = 0; | |||
3285 | Info.readMem = true; | |||
3286 | Info.writeMem = false; | |||
3287 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3288 | ||||
3289 | return true; | |||
3290 | } | |||
3291 | ||||
3292 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
3293 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
3294 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
3295 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
3296 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
3297 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
3298 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
3299 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
3300 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
3301 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
3302 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
3303 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
3304 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
3305 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
3306 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
3307 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
3308 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
3309 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
3310 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
3311 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
3312 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
3313 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
3314 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
3315 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
3316 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
3317 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
3318 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
3319 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
3320 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
3321 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
3322 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
3323 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
3324 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
3325 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
3326 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
3327 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
3328 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
3329 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
3330 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
3331 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
3332 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
3333 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
3334 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
3335 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
3336 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
3337 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
3338 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
3339 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
3340 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
3341 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
3342 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
3343 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
3344 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
3345 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
3346 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
3347 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { | |||
3348 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3349 | Info.memVT = MVT::v4f32; | |||
3350 | Info.ptrVal = nullptr; | |||
3351 | Info.offset = 0; | |||
3352 | Info.vol = 0; | |||
3353 | Info.readMem = true; | |||
3354 | Info.writeMem = false; | |||
3355 | Info.align = 16; | |||
3356 | return true; | |||
3357 | } | |||
3358 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
3359 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
3360 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
3361 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
3362 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
3363 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
3364 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
3365 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
3366 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
3367 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
3368 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
3369 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
3370 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
3371 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
3372 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
3373 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
3374 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
3375 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
3376 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
3377 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
3378 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
3379 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
3380 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
3381 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
3382 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
3383 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
3384 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
3385 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
3386 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
3387 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
3388 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
3389 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
3390 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
3391 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
3392 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
3393 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
3394 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
3395 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
3396 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
3397 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
3398 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
3399 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
3400 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
3401 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
3402 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
3403 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
3404 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
3405 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
3406 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
3407 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
3408 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
3409 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
3410 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
3411 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
3412 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
3413 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
3414 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
3415 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
3416 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
3417 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
3418 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
3419 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
3420 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
3421 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
3422 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
3423 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
3424 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
3425 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
3426 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
3427 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
3428 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
3429 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
3430 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
3431 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
3432 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
3433 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
3434 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
3435 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
3436 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
3437 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
3438 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
3439 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
3440 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
3441 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
3442 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
3443 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
3444 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
3445 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
3446 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
3447 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
3448 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
3449 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
3450 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
3451 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
3452 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
3453 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
3454 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
3455 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
3456 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
3457 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
3458 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
3459 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
3460 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
3461 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
3462 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
3463 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
3464 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
3465 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
3466 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
3467 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
3468 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
3469 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { | |||
3470 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3471 | Info.memVT = MVT::v4i32; | |||
3472 | Info.ptrVal = nullptr; | |||
3473 | Info.offset = 0; | |||
3474 | Info.vol = 0; | |||
3475 | Info.readMem = true; | |||
3476 | Info.writeMem = false; | |||
3477 | Info.align = 16; | |||
3478 | return true; | |||
3479 | } | |||
3480 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
3481 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
3482 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
3483 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
3484 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
3485 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
3486 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
3487 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
3488 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
3489 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
3490 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
3491 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
3492 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
3493 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
3494 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
3495 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
3496 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3497 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3498 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3499 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3500 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3501 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3502 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3503 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3504 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3505 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3506 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3507 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3508 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3509 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3510 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3511 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3512 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3513 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3514 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3515 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3516 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3517 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3518 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3519 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3520 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3521 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3522 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3523 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3524 | case Intrinsic::nvvm_suld_3d_v4i8_zero: { | |||
3525 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3526 | Info.memVT = MVT::i8; | |||
3527 | Info.ptrVal = nullptr; | |||
3528 | Info.offset = 0; | |||
3529 | Info.vol = 0; | |||
3530 | Info.readMem = true; | |||
3531 | Info.writeMem = false; | |||
3532 | Info.align = 16; | |||
3533 | return true; | |||
3534 | } | |||
3535 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
3536 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
3537 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
3538 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
3539 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
3540 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
3541 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
3542 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
3543 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
3544 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
3545 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
3546 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
3547 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
3548 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
3549 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
3550 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
3551 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3552 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3553 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3554 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3555 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3556 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3557 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3558 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3559 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3560 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3561 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3562 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3563 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3564 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3565 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3566 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3567 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3568 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3569 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3570 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3571 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3572 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3573 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3574 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3575 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3576 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3577 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3578 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3579 | case Intrinsic::nvvm_suld_3d_v4i16_zero: { | |||
3580 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3581 | Info.memVT = MVT::i16; | |||
3582 | Info.ptrVal = nullptr; | |||
3583 | Info.offset = 0; | |||
3584 | Info.vol = 0; | |||
3585 | Info.readMem = true; | |||
3586 | Info.writeMem = false; | |||
3587 | Info.align = 16; | |||
3588 | return true; | |||
3589 | } | |||
3590 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
3591 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
3592 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
3593 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
3594 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
3595 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
3596 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
3597 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
3598 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
3599 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
3600 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
3601 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
3602 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
3603 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
3604 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
3605 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
3606 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3607 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3608 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3609 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3610 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3611 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3612 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3613 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3614 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3615 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3616 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3617 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3618 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3619 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3620 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3621 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3622 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3623 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3624 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3625 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3626 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3627 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3628 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3629 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3630 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3631 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3632 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3633 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3634 | case Intrinsic::nvvm_suld_3d_v4i32_zero: { | |||
3635 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3636 | Info.memVT = MVT::i32; | |||
3637 | Info.ptrVal = nullptr; | |||
3638 | Info.offset = 0; | |||
3639 | Info.vol = 0; | |||
3640 | Info.readMem = true; | |||
3641 | Info.writeMem = false; | |||
3642 | Info.align = 16; | |||
3643 | return true; | |||
3644 | } | |||
3645 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
3646 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
3647 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
3648 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
3649 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
3650 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
3651 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
3652 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
3653 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
3654 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
3655 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3656 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3657 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3658 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3659 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3660 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3661 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3662 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3663 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3664 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3665 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3666 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3667 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3668 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3669 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3670 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3671 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3672 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3673 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3674 | case Intrinsic::nvvm_suld_3d_v2i64_zero: { | |||
3675 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3676 | Info.memVT = MVT::i64; | |||
3677 | Info.ptrVal = nullptr; | |||
3678 | Info.offset = 0; | |||
3679 | Info.vol = 0; | |||
3680 | Info.readMem = true; | |||
3681 | Info.writeMem = false; | |||
3682 | Info.align = 16; | |||
3683 | return true; | |||
3684 | } | |||
3685 | } | |||
3686 | return false; | |||
3687 | } | |||
3688 | ||||
3689 | /// isLegalAddressingMode - Return true if the addressing mode represented | |||
3690 | /// by AM is legal for this target, for a load/store of the specified type. | |||
3691 | /// Used to guide target specific optimizations, like loop strength reduction | |||
3692 | /// (LoopStrengthReduce.cpp) and memory optimization for address mode | |||
3693 | /// (CodeGenPrepare.cpp) | |||
3694 | bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, | |||
3695 | Type *Ty) const { | |||
3696 | ||||
3697 | // AddrMode - This represents an addressing mode of: | |||
3698 | // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | |||
3699 | // | |||
3700 | // The legal address modes are | |||
3701 | // - [avar] | |||
3702 | // - [areg] | |||
3703 | // - [areg+immoff] | |||
3704 | // - [immAddr] | |||
3705 | ||||
3706 | if (AM.BaseGV) { | |||
3707 | if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) | |||
3708 | return false; | |||
3709 | return true; | |||
3710 | } | |||
3711 | ||||
3712 | switch (AM.Scale) { | |||
3713 | case 0: // "r", "r+i" or "i" is allowed | |||
3714 | break; | |||
3715 | case 1: | |||
3716 | if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. | |||
3717 | return false; | |||
3718 | // Otherwise we have r+i. | |||
3719 | break; | |||
3720 | default: | |||
3721 | // No scale > 1 is allowed | |||
3722 | return false; | |||
3723 | } | |||
3724 | return true; | |||
3725 | } | |||
3726 | ||||
3727 | //===----------------------------------------------------------------------===// | |||
3728 | // NVPTX Inline Assembly Support | |||
3729 | //===----------------------------------------------------------------------===// | |||
3730 | ||||
3731 | /// getConstraintType - Given a constraint letter, return the type of | |||
3732 | /// constraint it is for this target. | |||
3733 | NVPTXTargetLowering::ConstraintType | |||
3734 | NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { | |||
3735 | if (Constraint.size() == 1) { | |||
3736 | switch (Constraint[0]) { | |||
3737 | default: | |||
3738 | break; | |||
3739 | case 'b': | |||
3740 | case 'r': | |||
3741 | case 'h': | |||
3742 | case 'c': | |||
3743 | case 'l': | |||
3744 | case 'f': | |||
3745 | case 'd': | |||
3746 | case '0': | |||
3747 | case 'N': | |||
3748 | return C_RegisterClass; | |||
3749 | } | |||
3750 | } | |||
3751 | return TargetLowering::getConstraintType(Constraint); | |||
3752 | } | |||
3753 | ||||
3754 | std::pair<unsigned, const TargetRegisterClass *> | |||
3755 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, | |||
3756 | MVT VT) const { | |||
3757 | if (Constraint.size() == 1) { | |||
3758 | switch (Constraint[0]) { | |||
3759 | case 'b': | |||
3760 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); | |||
3761 | case 'c': | |||
3762 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3763 | case 'h': | |||
3764 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3765 | case 'r': | |||
3766 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); | |||
3767 | case 'l': | |||
3768 | case 'N': | |||
3769 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); | |||
3770 | case 'f': | |||
3771 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); | |||
3772 | case 'd': | |||
3773 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); | |||
3774 | } | |||
3775 | } | |||
3776 | return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); | |||
3777 | } | |||
3778 | ||||
3779 | /// getFunctionAlignment - Return the Log2 alignment of this function. | |||
3780 | unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { | |||
3781 | return 4; | |||
3782 | } | |||
3783 | ||||
3784 | //===----------------------------------------------------------------------===// | |||
3785 | // NVPTX DAG Combining | |||
3786 | //===----------------------------------------------------------------------===// | |||
3787 | ||||
3788 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, | |||
3789 | CodeGenOpt::Level OptLevel) const { | |||
3790 | const Function *F = MF.getFunction(); | |||
3791 | const TargetOptions &TO = MF.getTarget().Options; | |||
3792 | ||||
3793 | // Always honor command-line argument | |||
3794 | if (FMAContractLevelOpt.getNumOccurrences() > 0) { | |||
3795 | return FMAContractLevelOpt > 0; | |||
3796 | } else if (OptLevel == 0) { | |||
3797 | // Do not contract if we're not optimizing the code | |||
3798 | return false; | |||
3799 | } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { | |||
3800 | // Honor TargetOptions flags that explicitly say fusion is okay | |||
3801 | return true; | |||
3802 | } else if (F->hasFnAttribute("unsafe-fp-math")) { | |||
3803 | // Check for unsafe-fp-math=true coming from Clang | |||
3804 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); | |||
3805 | StringRef Val = Attr.getValueAsString(); | |||
3806 | if (Val == "true") | |||
3807 | return true; | |||
3808 | } | |||
3809 | ||||
3810 | // We did not have a clear indication that fusion is allowed, so assume not | |||
3811 | return false; | |||
3812 | } | |||
3813 | ||||
3814 | /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with | |||
3815 | /// operands N0 and N1. This is a helper for PerformADDCombine that is | |||
3816 | /// called with the default operands, and if that fails, with commuted | |||
3817 | /// operands. | |||
3818 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, | |||
3819 | TargetLowering::DAGCombinerInfo &DCI, | |||
3820 | const NVPTXSubtarget &Subtarget, | |||
3821 | CodeGenOpt::Level OptLevel) { | |||
3822 | SelectionDAG &DAG = DCI.DAG; | |||
3823 | // Skip non-integer, non-scalar case | |||
3824 | EVT VT=N0.getValueType(); | |||
3825 | if (VT.isVector()) | |||
3826 | return SDValue(); | |||
3827 | ||||
3828 | // fold (add (mul a, b), c) -> (mad a, b, c) | |||
3829 | // | |||
3830 | if (N0.getOpcode() == ISD::MUL) { | |||
3831 | assert (VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 3831, __PRETTY_FUNCTION__)); | |||
3832 | // For integer: | |||
3833 | // Since integer multiply-add costs the same as integer multiply | |||
3834 | // but is more costly than integer add, do the fusion only when | |||
3835 | // the mul is only used in the add. | |||
3836 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || | |||
3837 | !N0.getNode()->hasOneUse()) | |||
3838 | return SDValue(); | |||
3839 | ||||
3840 | // Do the folding | |||
3841 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, | |||
3842 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3843 | } | |||
3844 | else if (N0.getOpcode() == ISD::FMUL) { | |||
3845 | if (VT == MVT::f32 || VT == MVT::f64) { | |||
3846 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( | |||
3847 | &DAG.getTargetLoweringInfo()); | |||
3848 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) | |||
3849 | return SDValue(); | |||
3850 | ||||
3851 | // For floating point: | |||
3852 | // Do the fusion only when the mul has less than 5 uses and all | |||
3853 | // are add. | |||
3854 | // The heuristic is that if a use is not an add, then that use | |||
3855 | // cannot be fused into fma, therefore mul is still needed anyway. | |||
3856 | // If there are more than 4 uses, even if they are all add, fusing | |||
3857 | // them will increase register pressue. | |||
3858 | // | |||
3859 | int numUses = 0; | |||
3860 | int nonAddCount = 0; | |||
3861 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), | |||
3862 | UE = N0.getNode()->use_end(); | |||
3863 | UI != UE; ++UI) { | |||
3864 | numUses++; | |||
3865 | SDNode *User = *UI; | |||
3866 | if (User->getOpcode() != ISD::FADD) | |||
3867 | ++nonAddCount; | |||
3868 | } | |||
3869 | if (numUses >= 5) | |||
3870 | return SDValue(); | |||
3871 | if (nonAddCount) { | |||
3872 | int orderNo = N->getIROrder(); | |||
3873 | int orderNo2 = N0.getNode()->getIROrder(); | |||
3874 | // simple heuristics here for considering potential register | |||
3875 | // pressure, the logics here is that the differnce are used | |||
3876 | // to measure the distance between def and use, the longer distance | |||
3877 | // more likely cause register pressure. | |||
3878 | if (orderNo - orderNo2 < 500) | |||
3879 | return SDValue(); | |||
3880 | ||||
3881 | // Now, check if at least one of the FMUL's operands is live beyond the node N, | |||
3882 | // which guarantees that the FMA will not increase register pressure at node N. | |||
3883 | bool opIsLive = false; | |||
3884 | const SDNode *left = N0.getOperand(0).getNode(); | |||
3885 | const SDNode *right = N0.getOperand(1).getNode(); | |||
3886 | ||||
3887 | if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) | |||
3888 | opIsLive = true; | |||
3889 | ||||
3890 | if (!opIsLive) | |||
3891 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { | |||
3892 | SDNode *User = *UI; | |||
3893 | int orderNo3 = User->getIROrder(); | |||
3894 | if (orderNo3 > orderNo) { | |||
3895 | opIsLive = true; | |||
3896 | break; | |||
3897 | } | |||
3898 | } | |||
3899 | ||||
3900 | if (!opIsLive) | |||
3901 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { | |||
3902 | SDNode *User = *UI; | |||
3903 | int orderNo3 = User->getIROrder(); | |||
3904 | if (orderNo3 > orderNo) { | |||
3905 | opIsLive = true; | |||
3906 | break; | |||
3907 | } | |||
3908 | } | |||
3909 | ||||
3910 | if (!opIsLive) | |||
3911 | return SDValue(); | |||
3912 | } | |||
3913 | ||||
3914 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, | |||
3915 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3916 | } | |||
3917 | } | |||
3918 | ||||
3919 | return SDValue(); | |||
3920 | } | |||
3921 | ||||
3922 | /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. | |||
3923 | /// | |||
3924 | static SDValue PerformADDCombine(SDNode *N, | |||
3925 | TargetLowering::DAGCombinerInfo &DCI, | |||
3926 | const NVPTXSubtarget &Subtarget, | |||
3927 | CodeGenOpt::Level OptLevel) { | |||
3928 | SDValue N0 = N->getOperand(0); | |||
3929 | SDValue N1 = N->getOperand(1); | |||
3930 | ||||
3931 | // First try with the default operand order. | |||
3932 | SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, | |||
3933 | OptLevel); | |||
3934 | if (Result.getNode()) | |||
3935 | return Result; | |||
3936 | ||||
3937 | // If that didn't work, try again with the operands commuted. | |||
3938 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); | |||
3939 | } | |||
3940 | ||||
3941 | static SDValue PerformANDCombine(SDNode *N, | |||
3942 | TargetLowering::DAGCombinerInfo &DCI) { | |||
3943 | // The type legalizer turns a vector load of i8 values into a zextload to i16 | |||
3944 | // registers, optionally ANY_EXTENDs it (if target type is integer), | |||
3945 | // and ANDs off the high 8 bits. Since we turn this load into a | |||
3946 | // target-specific DAG node, the DAG combiner fails to eliminate these AND | |||
3947 | // nodes. Do that here. | |||
3948 | SDValue Val = N->getOperand(0); | |||
3949 | SDValue Mask = N->getOperand(1); | |||
3950 | ||||
3951 | if (isa<ConstantSDNode>(Val)) { | |||
3952 | std::swap(Val, Mask); | |||
3953 | } | |||
3954 | ||||
3955 | SDValue AExt; | |||
3956 | // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and | |||
3957 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | |||
3958 | AExt = Val; | |||
3959 | Val = Val->getOperand(0); | |||
3960 | } | |||
3961 | ||||
3962 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { | |||
3963 | Val = Val->getOperand(0); | |||
3964 | } | |||
3965 | ||||
3966 | if (Val->getOpcode() == NVPTXISD::LoadV2 || | |||
3967 | Val->getOpcode() == NVPTXISD::LoadV4) { | |||
3968 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); | |||
3969 | if (!MaskCnst) { | |||
3970 | // Not an AND with a constant | |||
3971 | return SDValue(); | |||
3972 | } | |||
3973 | ||||
3974 | uint64_t MaskVal = MaskCnst->getZExtValue(); | |||
3975 | if (MaskVal != 0xff) { | |||
3976 | // Not an AND that chops off top 8 bits | |||
3977 | return SDValue(); | |||
3978 | } | |||
3979 | ||||
3980 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); | |||
3981 | if (!Mem) { | |||
3982 | // Not a MemSDNode?!? | |||
3983 | return SDValue(); | |||
3984 | } | |||
3985 | ||||
3986 | EVT MemVT = Mem->getMemoryVT(); | |||
3987 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { | |||
3988 | // We only handle the i8 case | |||
3989 | return SDValue(); | |||
3990 | } | |||
3991 | ||||
3992 | unsigned ExtType = | |||
3993 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> | |||
3994 | getZExtValue(); | |||
3995 | if (ExtType == ISD::SEXTLOAD) { | |||
3996 | // If for some reason the load is a sextload, the and is needed to zero | |||
3997 | // out the high 8 bits | |||
3998 | return SDValue(); | |||
3999 | } | |||
4000 | ||||
4001 | bool AddTo = false; | |||
4002 | if (AExt.getNode() != 0) { | |||
4003 | // Re-insert the ext as a zext. | |||
4004 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), | |||
4005 | AExt.getValueType(), Val); | |||
4006 | AddTo = true; | |||
4007 | } | |||
4008 | ||||
4009 | // If we get here, the AND is unnecessary. Just replace it with the load | |||
4010 | DCI.CombineTo(N, Val, AddTo); | |||
4011 | } | |||
4012 | ||||
4013 | return SDValue(); | |||
4014 | } | |||
4015 | ||||
4016 | enum OperandSignedness { | |||
4017 | Signed = 0, | |||
4018 | Unsigned, | |||
4019 | Unknown | |||
4020 | }; | |||
4021 | ||||
4022 | /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand | |||
4023 | /// that can be demoted to \p OptSize bits without loss of information. The | |||
4024 | /// signedness of the operand, if determinable, is placed in \p S. | |||
4025 | static bool IsMulWideOperandDemotable(SDValue Op, | |||
4026 | unsigned OptSize, | |||
4027 | OperandSignedness &S) { | |||
4028 | S = Unknown; | |||
4029 | ||||
4030 | if (Op.getOpcode() == ISD::SIGN_EXTEND || | |||
4031 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { | |||
4032 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4033 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4034 | S = Signed; | |||
4035 | return true; | |||
4036 | } | |||
4037 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { | |||
4038 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4039 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4040 | S = Unsigned; | |||
4041 | return true; | |||
4042 | } | |||
4043 | } | |||
4044 | ||||
4045 | return false; | |||
4046 | } | |||
4047 | ||||
4048 | /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can | |||
4049 | /// be demoted to \p OptSize bits without loss of information. If the operands | |||
4050 | /// contain a constant, it should appear as the RHS operand. The signedness of | |||
4051 | /// the operands is placed in \p IsSigned. | |||
4052 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, | |||
4053 | unsigned OptSize, | |||
4054 | bool &IsSigned) { | |||
4055 | ||||
4056 | OperandSignedness LHSSign; | |||
4057 | ||||
4058 | // The LHS operand must be a demotable op | |||
4059 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) | |||
4060 | return false; | |||
4061 | ||||
4062 | // We should have been able to determine the signedness from the LHS | |||
4063 | if (LHSSign == Unknown) | |||
4064 | return false; | |||
4065 | ||||
4066 | IsSigned = (LHSSign == Signed); | |||
4067 | ||||
4068 | // The RHS can be a demotable op or a constant | |||
4069 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { | |||
4070 | APInt Val = CI->getAPIntValue(); | |||
4071 | if (LHSSign == Unsigned) { | |||
4072 | if (Val.isIntN(OptSize)) { | |||
4073 | return true; | |||
4074 | } | |||
4075 | return false; | |||
4076 | } else { | |||
4077 | if (Val.isSignedIntN(OptSize)) { | |||
4078 | return true; | |||
4079 | } | |||
4080 | return false; | |||
4081 | } | |||
4082 | } else { | |||
4083 | OperandSignedness RHSSign; | |||
4084 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) | |||
4085 | return false; | |||
4086 | ||||
4087 | if (LHSSign != RHSSign) | |||
4088 | return false; | |||
4089 | ||||
4090 | return true; | |||
4091 | } | |||
4092 | } | |||
4093 | ||||
4094 | /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply | |||
4095 | /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform | |||
4096 | /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift | |||
4097 | /// amount. | |||
4098 | static SDValue TryMULWIDECombine(SDNode *N, | |||
4099 | TargetLowering::DAGCombinerInfo &DCI) { | |||
4100 | EVT MulType = N->getValueType(0); | |||
4101 | if (MulType != MVT::i32 && MulType != MVT::i64) { | |||
4102 | return SDValue(); | |||
4103 | } | |||
4104 | ||||
4105 | unsigned OptSize = MulType.getSizeInBits() >> 1; | |||
4106 | SDValue LHS = N->getOperand(0); | |||
4107 | SDValue RHS = N->getOperand(1); | |||
4108 | ||||
4109 | // Canonicalize the multiply so the constant (if any) is on the right | |||
4110 | if (N->getOpcode() == ISD::MUL) { | |||
4111 | if (isa<ConstantSDNode>(LHS)) { | |||
4112 | std::swap(LHS, RHS); | |||
4113 | } | |||
4114 | } | |||
4115 | ||||
4116 | // If we have a SHL, determine the actual multiply amount | |||
4117 | if (N->getOpcode() == ISD::SHL) { | |||
4118 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); | |||
4119 | if (!ShlRHS) { | |||
4120 | return SDValue(); | |||
4121 | } | |||
4122 | ||||
4123 | APInt ShiftAmt = ShlRHS->getAPIntValue(); | |||
4124 | unsigned BitWidth = MulType.getSizeInBits(); | |||
4125 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { | |||
4126 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; | |||
4127 | RHS = DCI.DAG.getConstant(MulVal, MulType); | |||
4128 | } else { | |||
4129 | return SDValue(); | |||
4130 | } | |||
4131 | } | |||
4132 | ||||
4133 | bool Signed; | |||
4134 | // Verify that our operands are demotable | |||
4135 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { | |||
4136 | return SDValue(); | |||
4137 | } | |||
4138 | ||||
4139 | EVT DemotedVT; | |||
4140 | if (MulType == MVT::i32) { | |||
4141 | DemotedVT = MVT::i16; | |||
4142 | } else { | |||
4143 | DemotedVT = MVT::i32; | |||
4144 | } | |||
4145 | ||||
4146 | // Truncate the operands to the correct size. Note that these are just for | |||
4147 | // type consistency and will (likely) be eliminated in later phases. | |||
4148 | SDValue TruncLHS = | |||
4149 | DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); | |||
4150 | SDValue TruncRHS = | |||
4151 | DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); | |||
4152 | ||||
4153 | unsigned Opc; | |||
4154 | if (Signed) { | |||
4155 | Opc = NVPTXISD::MUL_WIDE_SIGNED; | |||
4156 | } else { | |||
4157 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; | |||
4158 | } | |||
4159 | ||||
4160 | return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); | |||
4161 | } | |||
4162 | ||||
4163 | /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. | |||
4164 | static SDValue PerformMULCombine(SDNode *N, | |||
4165 | TargetLowering::DAGCombinerInfo &DCI, | |||
4166 | CodeGenOpt::Level OptLevel) { | |||
4167 | if (OptLevel > 0) { | |||
4168 | // Try mul.wide combining at OptLevel > 0 | |||
4169 | SDValue Ret = TryMULWIDECombine(N, DCI); | |||
4170 | if (Ret.getNode()) | |||
4171 | return Ret; | |||
4172 | } | |||
4173 | ||||
4174 | return SDValue(); | |||
4175 | } | |||
4176 | ||||
4177 | /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. | |||
4178 | static SDValue PerformSHLCombine(SDNode *N, | |||
4179 | TargetLowering::DAGCombinerInfo &DCI, | |||
4180 | CodeGenOpt::Level OptLevel) { | |||
4181 | if (OptLevel > 0) { | |||
4182 | // Try mul.wide combining at OptLevel > 0 | |||
4183 | SDValue Ret = TryMULWIDECombine(N, DCI); | |||
4184 | if (Ret.getNode()) | |||
4185 | return Ret; | |||
4186 | } | |||
4187 | ||||
4188 | return SDValue(); | |||
4189 | } | |||
4190 | ||||
4191 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, | |||
4192 | DAGCombinerInfo &DCI) const { | |||
4193 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); | |||
4194 | switch (N->getOpcode()) { | |||
4195 | default: break; | |||
4196 | case ISD::ADD: | |||
4197 | case ISD::FADD: | |||
4198 | return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); | |||
4199 | case ISD::MUL: | |||
4200 | return PerformMULCombine(N, DCI, OptLevel); | |||
4201 | case ISD::SHL: | |||
4202 | return PerformSHLCombine(N, DCI, OptLevel); | |||
4203 | case ISD::AND: | |||
4204 | return PerformANDCombine(N, DCI); | |||
4205 | } | |||
4206 | return SDValue(); | |||
4207 | } | |||
4208 | ||||
4209 | /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. | |||
4210 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, | |||
4211 | const DataLayout *TD, | |||
4212 | SmallVectorImpl<SDValue> &Results) { | |||
4213 | EVT ResVT = N->getValueType(0); | |||
4214 | SDLoc DL(N); | |||
4215 | ||||
4216 | assert(ResVT.isVector() && "Vector load must have vector type")((ResVT.isVector() && "Vector load must have vector type" ) ? static_cast<void> (0) : __assert_fail ("ResVT.isVector() && \"Vector load must have vector type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4216, __PRETTY_FUNCTION__)); | |||
4217 | ||||
4218 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
4219 | // legal. We can (and should) split that into 2 loads of <2 x double> here | |||
4220 | // but I'm leaving that as a TODO for now. | |||
4221 | assert(ResVT.isSimple() && "Can only handle simple types")((ResVT.isSimple() && "Can only handle simple types") ? static_cast<void> (0) : __assert_fail ("ResVT.isSimple() && \"Can only handle simple types\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4221, __PRETTY_FUNCTION__)); | |||
4222 | switch (ResVT.getSimpleVT().SimpleTy) { | |||
4223 | default: | |||
4224 | return; | |||
4225 | case MVT::v2i8: | |||
4226 | case MVT::v2i16: | |||
4227 | case MVT::v2i32: | |||
4228 | case MVT::v2i64: | |||
4229 | case MVT::v2f32: | |||
4230 | case MVT::v2f64: | |||
4231 | case MVT::v4i8: | |||
4232 | case MVT::v4i16: | |||
4233 | case MVT::v4i32: | |||
4234 | case MVT::v4f32: | |||
4235 | // This is a "native" vector type | |||
4236 | break; | |||
4237 | } | |||
4238 | ||||
4239 | LoadSDNode *LD = cast<LoadSDNode>(N); | |||
4240 | ||||
4241 | unsigned Align = LD->getAlignment(); | |||
4242 | unsigned PrefAlign = | |||
4243 | TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); | |||
4244 | if (Align < PrefAlign) { | |||
4245 | // This load is not sufficiently aligned, so bail out and let this vector | |||
4246 | // load be scalarized. Note that we may still be able to emit smaller | |||
4247 | // vector loads. For example, if we are loading a <4 x float> with an | |||
4248 | // alignment of 8, this check will fail but the legalizer will try again | |||
4249 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
4250 | return; | |||
4251 | } | |||
4252 | ||||
4253 | EVT EltVT = ResVT.getVectorElementType(); | |||
4254 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4255 | ||||
4256 | // Since LoadV2 is a target node, we cannot rely on DAG type legalization. | |||
4257 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4258 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4259 | bool NeedTrunc = false; | |||
4260 | if (EltVT.getSizeInBits() < 16) { | |||
4261 | EltVT = MVT::i16; | |||
4262 | NeedTrunc = true; | |||
4263 | } | |||
4264 | ||||
4265 | unsigned Opcode = 0; | |||
4266 | SDVTList LdResVTs; | |||
4267 | ||||
4268 | switch (NumElts) { | |||
4269 | default: | |||
4270 | return; | |||
4271 | case 2: | |||
4272 | Opcode = NVPTXISD::LoadV2; | |||
4273 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4274 | break; | |||
4275 | case 4: { | |||
4276 | Opcode = NVPTXISD::LoadV4; | |||
4277 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4278 | LdResVTs = DAG.getVTList(ListVTs); | |||
4279 | break; | |||
4280 | } | |||
4281 | } | |||
4282 | ||||
4283 | SmallVector<SDValue, 8> OtherOps; | |||
4284 | ||||
4285 | // Copy regular operands | |||
4286 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |||
4287 | OtherOps.push_back(N->getOperand(i)); | |||
4288 | ||||
4289 | // The select routine does not have access to the LoadSDNode instance, so | |||
4290 | // pass along the extension information | |||
4291 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); | |||
4292 | ||||
4293 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4294 | LD->getMemoryVT(), | |||
4295 | LD->getMemOperand()); | |||
4296 | ||||
4297 | SmallVector<SDValue, 4> ScalarRes; | |||
4298 | ||||
4299 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4300 | SDValue Res = NewLD.getValue(i); | |||
4301 | if (NeedTrunc) | |||
4302 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4303 | ScalarRes.push_back(Res); | |||
4304 | } | |||
4305 | ||||
4306 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4307 | ||||
4308 | SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); | |||
4309 | ||||
4310 | Results.push_back(BuildVec); | |||
4311 | Results.push_back(LoadChain); | |||
4312 | } | |||
4313 | ||||
4314 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, | |||
4315 | SmallVectorImpl<SDValue> &Results) { | |||
4316 | SDValue Chain = N->getOperand(0); | |||
4317 | SDValue Intrin = N->getOperand(1); | |||
4318 | SDLoc DL(N); | |||
4319 | ||||
4320 | // Get the intrinsic ID | |||
4321 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); | |||
4322 | switch (IntrinNo) { | |||
4323 | default: | |||
4324 | return; | |||
4325 | case Intrinsic::nvvm_ldg_global_i: | |||
4326 | case Intrinsic::nvvm_ldg_global_f: | |||
4327 | case Intrinsic::nvvm_ldg_global_p: | |||
4328 | case Intrinsic::nvvm_ldu_global_i: | |||
4329 | case Intrinsic::nvvm_ldu_global_f: | |||
4330 | case Intrinsic::nvvm_ldu_global_p: { | |||
4331 | EVT ResVT = N->getValueType(0); | |||
4332 | ||||
4333 | if (ResVT.isVector()) { | |||
4334 | // Vector LDG/LDU | |||
4335 | ||||
4336 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4337 | EVT EltVT = ResVT.getVectorElementType(); | |||
4338 | ||||
4339 | // Since LDU/LDG are target nodes, we cannot rely on DAG type | |||
4340 | // legalization. | |||
4341 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4342 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4343 | bool NeedTrunc = false; | |||
4344 | if (EltVT.getSizeInBits() < 16) { | |||
4345 | EltVT = MVT::i16; | |||
4346 | NeedTrunc = true; | |||
4347 | } | |||
4348 | ||||
4349 | unsigned Opcode = 0; | |||
4350 | SDVTList LdResVTs; | |||
4351 | ||||
4352 | switch (NumElts) { | |||
4353 | default: | |||
4354 | return; | |||
4355 | case 2: | |||
4356 | switch (IntrinNo) { | |||
4357 | default: | |||
4358 | return; | |||
4359 | case Intrinsic::nvvm_ldg_global_i: | |||
4360 | case Intrinsic::nvvm_ldg_global_f: | |||
4361 | case Intrinsic::nvvm_ldg_global_p: | |||
4362 | Opcode = NVPTXISD::LDGV2; | |||
4363 | break; | |||
4364 | case Intrinsic::nvvm_ldu_global_i: | |||
4365 | case Intrinsic::nvvm_ldu_global_f: | |||
4366 | case Intrinsic::nvvm_ldu_global_p: | |||
4367 | Opcode = NVPTXISD::LDUV2; | |||
4368 | break; | |||
4369 | } | |||
4370 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4371 | break; | |||
4372 | case 4: { | |||
4373 | switch (IntrinNo) { | |||
4374 | default: | |||
4375 | return; | |||
4376 | case Intrinsic::nvvm_ldg_global_i: | |||
4377 | case Intrinsic::nvvm_ldg_global_f: | |||
4378 | case Intrinsic::nvvm_ldg_global_p: | |||
4379 | Opcode = NVPTXISD::LDGV4; | |||
4380 | break; | |||
4381 | case Intrinsic::nvvm_ldu_global_i: | |||
4382 | case Intrinsic::nvvm_ldu_global_f: | |||
4383 | case Intrinsic::nvvm_ldu_global_p: | |||
4384 | Opcode = NVPTXISD::LDUV4; | |||
4385 | break; | |||
4386 | } | |||
4387 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4388 | LdResVTs = DAG.getVTList(ListVTs); | |||
4389 | break; | |||
4390 | } | |||
4391 | } | |||
4392 | ||||
4393 | SmallVector<SDValue, 8> OtherOps; | |||
4394 | ||||
4395 | // Copy regular operands | |||
4396 | ||||
4397 | OtherOps.push_back(Chain); // Chain | |||
4398 | // Skip operand 1 (intrinsic ID) | |||
4399 | // Others | |||
4400 | for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) | |||
4401 | OtherOps.push_back(N->getOperand(i)); | |||
4402 | ||||
4403 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4404 | ||||
4405 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4406 | MemSD->getMemoryVT(), | |||
4407 | MemSD->getMemOperand()); | |||
4408 | ||||
4409 | SmallVector<SDValue, 4> ScalarRes; | |||
4410 | ||||
4411 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4412 | SDValue Res = NewLD.getValue(i); | |||
4413 | if (NeedTrunc) | |||
4414 | Res = | |||
4415 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4416 | ScalarRes.push_back(Res); | |||
4417 | } | |||
4418 | ||||
4419 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4420 | ||||
4421 | SDValue BuildVec = | |||
4422 | DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); | |||
4423 | ||||
4424 | Results.push_back(BuildVec); | |||
4425 | Results.push_back(LoadChain); | |||
4426 | } else { | |||
4427 | // i8 LDG/LDU | |||
4428 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4429, __PRETTY_FUNCTION__)) | |||
4429 | "Custom handling of non-i8 ldu/ldg?")((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220585/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4429, __PRETTY_FUNCTION__)); | |||
4430 | ||||
4431 | // Just copy all operands as-is | |||
4432 | SmallVector<SDValue, 4> Ops; | |||
4433 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) | |||
4434 | Ops.push_back(N->getOperand(i)); | |||
4435 | ||||
4436 | // Force output to i16 | |||
4437 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); | |||
4438 | ||||
4439 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4440 | ||||
4441 | // We make sure the memory type is i8, which will be used during isel | |||
4442 | // to select the proper instruction. | |||
4443 | SDValue NewLD = | |||
4444 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, | |||
4445 | MVT::i8, MemSD->getMemOperand()); | |||
4446 | ||||
4447 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, | |||
4448 | NewLD.getValue(0))); | |||
4449 | Results.push_back(NewLD.getValue(1)); | |||
4450 | } | |||
4451 | } | |||
4452 | } | |||
4453 | } | |||
4454 | ||||
4455 | void NVPTXTargetLowering::ReplaceNodeResults( | |||
4456 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { | |||
4457 | switch (N->getOpcode()) { | |||
4458 | default: | |||
4459 | report_fatal_error("Unhandled custom legalization"); | |||
4460 | case ISD::LOAD: | |||
4461 | ReplaceLoadVector(N, DAG, getDataLayout(), Results); | |||
4462 | return; | |||
4463 | case ISD::INTRINSIC_W_CHAIN: | |||
4464 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); | |||
4465 | return; | |||
4466 | } | |||
4467 | } | |||
4468 | ||||
4469 | // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. | |||
4470 | void NVPTXSection::anchor() {} | |||
4471 | ||||
4472 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { | |||
4473 | delete TextSection; | |||
4474 | delete DataSection; | |||
4475 | delete BSSSection; | |||
4476 | delete ReadOnlySection; | |||
4477 | ||||
4478 | delete StaticCtorSection; | |||
4479 | delete StaticDtorSection; | |||
4480 | delete LSDASection; | |||
4481 | delete EHFrameSection; | |||
4482 | delete DwarfAbbrevSection; | |||
4483 | delete DwarfInfoSection; | |||
4484 | delete DwarfLineSection; | |||
4485 | delete DwarfFrameSection; | |||
4486 | delete DwarfPubTypesSection; | |||
4487 | delete DwarfDebugInlineSection; | |||
4488 | delete DwarfStrSection; | |||
4489 | delete DwarfLocSection; | |||
4490 | delete DwarfARangesSection; | |||
4491 | delete DwarfRangesSection; | |||
4492 | delete DwarfMacroInfoSection; | |||
4493 | } |