File: | lib/Target/NVPTX/NVPTXISelLowering.cpp |
Location: | line 1604, column 13 |
Description: | Called C++ object pointer is null |
1 | // | |||
2 | // The LLVM Compiler Infrastructure | |||
3 | // | |||
4 | // This file is distributed under the University of Illinois Open Source | |||
5 | // License. See LICENSE.TXT for details. | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a | |||
10 | // selection DAG. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "NVPTXISelLowering.h" | |||
15 | #include "NVPTX.h" | |||
16 | #include "NVPTXTargetMachine.h" | |||
17 | #include "NVPTXTargetObjectFile.h" | |||
18 | #include "NVPTXUtilities.h" | |||
19 | #include "llvm/CodeGen/Analysis.h" | |||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
21 | #include "llvm/CodeGen/MachineFunction.h" | |||
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
24 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | |||
25 | #include "llvm/IR/CallSite.h" | |||
26 | #include "llvm/IR/DerivedTypes.h" | |||
27 | #include "llvm/IR/Function.h" | |||
28 | #include "llvm/IR/GlobalValue.h" | |||
29 | #include "llvm/IR/IntrinsicInst.h" | |||
30 | #include "llvm/IR/Intrinsics.h" | |||
31 | #include "llvm/IR/Module.h" | |||
32 | #include "llvm/MC/MCSectionELF.h" | |||
33 | #include "llvm/Support/CommandLine.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Support/ErrorHandling.h" | |||
36 | #include "llvm/Support/MathExtras.h" | |||
37 | #include "llvm/Support/raw_ostream.h" | |||
38 | #include <sstream> | |||
39 | ||||
40 | #undef DEBUG_TYPE"nvptx-lower" | |||
41 | #define DEBUG_TYPE"nvptx-lower" "nvptx-lower" | |||
42 | ||||
43 | using namespace llvm; | |||
44 | ||||
45 | static unsigned int uniqueCallSite = 0; | |||
46 | ||||
47 | static cl::opt<bool> sched4reg( | |||
48 | "nvptx-sched4reg", | |||
49 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); | |||
50 | ||||
51 | static cl::opt<unsigned> | |||
52 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, | |||
53 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" | |||
54 | " 1: do it 2: do it aggressively"), | |||
55 | cl::init(2)); | |||
56 | ||||
57 | static bool IsPTXVectorType(MVT VT) { | |||
58 | switch (VT.SimpleTy) { | |||
59 | default: | |||
60 | return false; | |||
61 | case MVT::v2i1: | |||
62 | case MVT::v4i1: | |||
63 | case MVT::v2i8: | |||
64 | case MVT::v4i8: | |||
65 | case MVT::v2i16: | |||
66 | case MVT::v4i16: | |||
67 | case MVT::v2i32: | |||
68 | case MVT::v4i32: | |||
69 | case MVT::v2i64: | |||
70 | case MVT::v2f32: | |||
71 | case MVT::v4f32: | |||
72 | case MVT::v2f64: | |||
73 | return true; | |||
74 | } | |||
75 | } | |||
76 | ||||
77 | /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive | |||
78 | /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors | |||
79 | /// into their primitive components. | |||
80 | /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the | |||
81 | /// same number of types as the Ins/Outs arrays in LowerFormalArguments, | |||
82 | /// LowerCall, and LowerReturn. | |||
83 | static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, | |||
84 | SmallVectorImpl<EVT> &ValueVTs, | |||
85 | SmallVectorImpl<uint64_t> *Offsets = nullptr, | |||
86 | uint64_t StartingOffset = 0) { | |||
87 | SmallVector<EVT, 16> TempVTs; | |||
88 | SmallVector<uint64_t, 16> TempOffsets; | |||
89 | ||||
90 | ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); | |||
91 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { | |||
92 | EVT VT = TempVTs[i]; | |||
93 | uint64_t Off = TempOffsets[i]; | |||
94 | if (VT.isVector()) | |||
95 | for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { | |||
96 | ValueVTs.push_back(VT.getVectorElementType()); | |||
97 | if (Offsets) | |||
98 | Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); | |||
99 | } | |||
100 | else { | |||
101 | ValueVTs.push_back(VT); | |||
102 | if (Offsets) | |||
103 | Offsets->push_back(Off); | |||
104 | } | |||
105 | } | |||
106 | } | |||
107 | ||||
108 | // NVPTXTargetLowering Constructor. | |||
109 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, | |||
110 | const NVPTXSubtarget &STI) | |||
111 | : TargetLowering(TM), nvTM(&TM), STI(STI) { | |||
112 | ||||
113 | // always lower memset, memcpy, and memmove intrinsics to load/store | |||
114 | // instructions, rather | |||
115 | // then generating calls to memset, mempcy or memmove. | |||
116 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; | |||
117 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; | |||
118 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; | |||
119 | ||||
120 | setBooleanContents(ZeroOrNegativeOneBooleanContent); | |||
121 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | |||
122 | ||||
123 | // Jump is Expensive. Don't create extra control flow for 'and', 'or' | |||
124 | // condition branches. | |||
125 | setJumpIsExpensive(true); | |||
126 | ||||
127 | // By default, use the Source scheduling | |||
128 | if (sched4reg) | |||
129 | setSchedulingPreference(Sched::RegPressure); | |||
130 | else | |||
131 | setSchedulingPreference(Sched::Source); | |||
132 | ||||
133 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); | |||
134 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); | |||
135 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); | |||
136 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); | |||
137 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); | |||
138 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); | |||
139 | ||||
140 | // Operations not directly supported by NVPTX. | |||
141 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); | |||
142 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); | |||
143 | setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); | |||
144 | setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); | |||
145 | setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); | |||
146 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); | |||
147 | setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); | |||
148 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); | |||
149 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); | |||
150 | setOperationAction(ISD::BR_CC, MVT::i1, Expand); | |||
151 | setOperationAction(ISD::BR_CC, MVT::i8, Expand); | |||
152 | setOperationAction(ISD::BR_CC, MVT::i16, Expand); | |||
153 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); | |||
154 | setOperationAction(ISD::BR_CC, MVT::i64, Expand); | |||
155 | // Some SIGN_EXTEND_INREG can be done using cvt instruction. | |||
156 | // For others we will expand to a SHL/SRA pair. | |||
157 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); | |||
158 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); | |||
159 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); | |||
160 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); | |||
161 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | |||
162 | ||||
163 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); | |||
164 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); | |||
165 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); | |||
166 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); | |||
167 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); | |||
168 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); | |||
169 | ||||
170 | if (STI.hasROT64()) { | |||
171 | setOperationAction(ISD::ROTL, MVT::i64, Legal); | |||
172 | setOperationAction(ISD::ROTR, MVT::i64, Legal); | |||
173 | } else { | |||
174 | setOperationAction(ISD::ROTL, MVT::i64, Expand); | |||
175 | setOperationAction(ISD::ROTR, MVT::i64, Expand); | |||
176 | } | |||
177 | if (STI.hasROT32()) { | |||
178 | setOperationAction(ISD::ROTL, MVT::i32, Legal); | |||
179 | setOperationAction(ISD::ROTR, MVT::i32, Legal); | |||
180 | } else { | |||
181 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | |||
182 | setOperationAction(ISD::ROTR, MVT::i32, Expand); | |||
183 | } | |||
184 | ||||
185 | setOperationAction(ISD::ROTL, MVT::i16, Expand); | |||
186 | setOperationAction(ISD::ROTR, MVT::i16, Expand); | |||
187 | setOperationAction(ISD::ROTL, MVT::i8, Expand); | |||
188 | setOperationAction(ISD::ROTR, MVT::i8, Expand); | |||
189 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); | |||
190 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | |||
191 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); | |||
192 | ||||
193 | // Indirect branch is not supported. | |||
194 | // This also disables Jump Table creation. | |||
195 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); | |||
196 | setOperationAction(ISD::BRIND, MVT::Other, Expand); | |||
197 | ||||
198 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | |||
199 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); | |||
200 | ||||
201 | // We want to legalize constant related memmove and memcopy | |||
202 | // intrinsics. | |||
203 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); | |||
204 | ||||
205 | // Turn FP extload into load/fextend | |||
206 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); | |||
207 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); | |||
208 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); | |||
209 | // Turn FP truncstore into trunc + store. | |||
210 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | |||
211 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | |||
212 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | |||
213 | ||||
214 | // PTX does not support load / store predicate registers | |||
215 | setOperationAction(ISD::LOAD, MVT::i1, Custom); | |||
216 | setOperationAction(ISD::STORE, MVT::i1, Custom); | |||
217 | ||||
218 | for (MVT VT : MVT::integer_valuetypes()) { | |||
219 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | |||
220 | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); | |||
221 | setTruncStoreAction(VT, MVT::i1, Expand); | |||
222 | } | |||
223 | ||||
224 | // This is legal in NVPTX | |||
225 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); | |||
226 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); | |||
227 | ||||
228 | // TRAP can be lowered to PTX trap | |||
229 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | |||
230 | ||||
231 | setOperationAction(ISD::ADDC, MVT::i64, Expand); | |||
232 | setOperationAction(ISD::ADDE, MVT::i64, Expand); | |||
233 | ||||
234 | // Register custom handling for vector loads/stores | |||
235 | for (MVT VT : MVT::vector_valuetypes()) { | |||
236 | if (IsPTXVectorType(VT)) { | |||
237 | setOperationAction(ISD::LOAD, VT, Custom); | |||
238 | setOperationAction(ISD::STORE, VT, Custom); | |||
239 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); | |||
240 | } | |||
241 | } | |||
242 | ||||
243 | // Custom handling for i8 intrinsics | |||
244 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); | |||
245 | ||||
246 | setOperationAction(ISD::CTLZ, MVT::i16, Legal); | |||
247 | setOperationAction(ISD::CTLZ, MVT::i32, Legal); | |||
248 | setOperationAction(ISD::CTLZ, MVT::i64, Legal); | |||
249 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); | |||
250 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); | |||
251 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); | |||
252 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); | |||
253 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); | |||
254 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); | |||
255 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); | |||
256 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); | |||
257 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); | |||
258 | setOperationAction(ISD::CTPOP, MVT::i16, Legal); | |||
259 | setOperationAction(ISD::CTPOP, MVT::i32, Legal); | |||
260 | setOperationAction(ISD::CTPOP, MVT::i64, Legal); | |||
261 | ||||
262 | // PTX does not directly support SELP of i1, so promote to i32 first | |||
263 | setOperationAction(ISD::SELECT, MVT::i1, Custom); | |||
264 | ||||
265 | // We have some custom DAG combine patterns for these nodes | |||
266 | setTargetDAGCombine(ISD::ADD); | |||
267 | setTargetDAGCombine(ISD::AND); | |||
268 | setTargetDAGCombine(ISD::FADD); | |||
269 | setTargetDAGCombine(ISD::MUL); | |||
270 | setTargetDAGCombine(ISD::SHL); | |||
271 | ||||
272 | // Now deduce the information based on the above mentioned | |||
273 | // actions | |||
274 | computeRegisterProperties(STI.getRegisterInfo()); | |||
275 | } | |||
276 | ||||
277 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { | |||
278 | switch ((NVPTXISD::NodeType)Opcode) { | |||
279 | case NVPTXISD::FIRST_NUMBER: | |||
280 | break; | |||
281 | case NVPTXISD::CALL: | |||
282 | return "NVPTXISD::CALL"; | |||
283 | case NVPTXISD::RET_FLAG: | |||
284 | return "NVPTXISD::RET_FLAG"; | |||
285 | case NVPTXISD::LOAD_PARAM: | |||
286 | return "NVPTXISD::LOAD_PARAM"; | |||
287 | case NVPTXISD::Wrapper: | |||
288 | return "NVPTXISD::Wrapper"; | |||
289 | case NVPTXISD::DeclareParam: | |||
290 | return "NVPTXISD::DeclareParam"; | |||
291 | case NVPTXISD::DeclareScalarParam: | |||
292 | return "NVPTXISD::DeclareScalarParam"; | |||
293 | case NVPTXISD::DeclareRet: | |||
294 | return "NVPTXISD::DeclareRet"; | |||
295 | case NVPTXISD::DeclareScalarRet: | |||
296 | return "NVPTXISD::DeclareScalarRet"; | |||
297 | case NVPTXISD::DeclareRetParam: | |||
298 | return "NVPTXISD::DeclareRetParam"; | |||
299 | case NVPTXISD::PrintCall: | |||
300 | return "NVPTXISD::PrintCall"; | |||
301 | case NVPTXISD::PrintCallUni: | |||
302 | return "NVPTXISD::PrintCallUni"; | |||
303 | case NVPTXISD::LoadParam: | |||
304 | return "NVPTXISD::LoadParam"; | |||
305 | case NVPTXISD::LoadParamV2: | |||
306 | return "NVPTXISD::LoadParamV2"; | |||
307 | case NVPTXISD::LoadParamV4: | |||
308 | return "NVPTXISD::LoadParamV4"; | |||
309 | case NVPTXISD::StoreParam: | |||
310 | return "NVPTXISD::StoreParam"; | |||
311 | case NVPTXISD::StoreParamV2: | |||
312 | return "NVPTXISD::StoreParamV2"; | |||
313 | case NVPTXISD::StoreParamV4: | |||
314 | return "NVPTXISD::StoreParamV4"; | |||
315 | case NVPTXISD::StoreParamS32: | |||
316 | return "NVPTXISD::StoreParamS32"; | |||
317 | case NVPTXISD::StoreParamU32: | |||
318 | return "NVPTXISD::StoreParamU32"; | |||
319 | case NVPTXISD::CallArgBegin: | |||
320 | return "NVPTXISD::CallArgBegin"; | |||
321 | case NVPTXISD::CallArg: | |||
322 | return "NVPTXISD::CallArg"; | |||
323 | case NVPTXISD::LastCallArg: | |||
324 | return "NVPTXISD::LastCallArg"; | |||
325 | case NVPTXISD::CallArgEnd: | |||
326 | return "NVPTXISD::CallArgEnd"; | |||
327 | case NVPTXISD::CallVoid: | |||
328 | return "NVPTXISD::CallVoid"; | |||
329 | case NVPTXISD::CallVal: | |||
330 | return "NVPTXISD::CallVal"; | |||
331 | case NVPTXISD::CallSymbol: | |||
332 | return "NVPTXISD::CallSymbol"; | |||
333 | case NVPTXISD::Prototype: | |||
334 | return "NVPTXISD::Prototype"; | |||
335 | case NVPTXISD::MoveParam: | |||
336 | return "NVPTXISD::MoveParam"; | |||
337 | case NVPTXISD::StoreRetval: | |||
338 | return "NVPTXISD::StoreRetval"; | |||
339 | case NVPTXISD::StoreRetvalV2: | |||
340 | return "NVPTXISD::StoreRetvalV2"; | |||
341 | case NVPTXISD::StoreRetvalV4: | |||
342 | return "NVPTXISD::StoreRetvalV4"; | |||
343 | case NVPTXISD::PseudoUseParam: | |||
344 | return "NVPTXISD::PseudoUseParam"; | |||
345 | case NVPTXISD::RETURN: | |||
346 | return "NVPTXISD::RETURN"; | |||
347 | case NVPTXISD::CallSeqBegin: | |||
348 | return "NVPTXISD::CallSeqBegin"; | |||
349 | case NVPTXISD::CallSeqEnd: | |||
350 | return "NVPTXISD::CallSeqEnd"; | |||
351 | case NVPTXISD::CallPrototype: | |||
352 | return "NVPTXISD::CallPrototype"; | |||
353 | case NVPTXISD::LoadV2: | |||
354 | return "NVPTXISD::LoadV2"; | |||
355 | case NVPTXISD::LoadV4: | |||
356 | return "NVPTXISD::LoadV4"; | |||
357 | case NVPTXISD::LDGV2: | |||
358 | return "NVPTXISD::LDGV2"; | |||
359 | case NVPTXISD::LDGV4: | |||
360 | return "NVPTXISD::LDGV4"; | |||
361 | case NVPTXISD::LDUV2: | |||
362 | return "NVPTXISD::LDUV2"; | |||
363 | case NVPTXISD::LDUV4: | |||
364 | return "NVPTXISD::LDUV4"; | |||
365 | case NVPTXISD::StoreV2: | |||
366 | return "NVPTXISD::StoreV2"; | |||
367 | case NVPTXISD::StoreV4: | |||
368 | return "NVPTXISD::StoreV4"; | |||
369 | case NVPTXISD::FUN_SHFL_CLAMP: | |||
370 | return "NVPTXISD::FUN_SHFL_CLAMP"; | |||
371 | case NVPTXISD::FUN_SHFR_CLAMP: | |||
372 | return "NVPTXISD::FUN_SHFR_CLAMP"; | |||
373 | case NVPTXISD::IMAD: | |||
374 | return "NVPTXISD::IMAD"; | |||
375 | case NVPTXISD::Dummy: | |||
376 | return "NVPTXISD::Dummy"; | |||
377 | case NVPTXISD::MUL_WIDE_SIGNED: | |||
378 | return "NVPTXISD::MUL_WIDE_SIGNED"; | |||
379 | case NVPTXISD::MUL_WIDE_UNSIGNED: | |||
380 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; | |||
381 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; | |||
382 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; | |||
383 | case NVPTXISD::Tex1DFloatFloatLevel: | |||
384 | return "NVPTXISD::Tex1DFloatFloatLevel"; | |||
385 | case NVPTXISD::Tex1DFloatFloatGrad: | |||
386 | return "NVPTXISD::Tex1DFloatFloatGrad"; | |||
387 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; | |||
388 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; | |||
389 | case NVPTXISD::Tex1DS32FloatLevel: | |||
390 | return "NVPTXISD::Tex1DS32FloatLevel"; | |||
391 | case NVPTXISD::Tex1DS32FloatGrad: | |||
392 | return "NVPTXISD::Tex1DS32FloatGrad"; | |||
393 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; | |||
394 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; | |||
395 | case NVPTXISD::Tex1DU32FloatLevel: | |||
396 | return "NVPTXISD::Tex1DU32FloatLevel"; | |||
397 | case NVPTXISD::Tex1DU32FloatGrad: | |||
398 | return "NVPTXISD::Tex1DU32FloatGrad"; | |||
399 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; | |||
400 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; | |||
401 | case NVPTXISD::Tex1DArrayFloatFloatLevel: | |||
402 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; | |||
403 | case NVPTXISD::Tex1DArrayFloatFloatGrad: | |||
404 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; | |||
405 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; | |||
406 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; | |||
407 | case NVPTXISD::Tex1DArrayS32FloatLevel: | |||
408 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; | |||
409 | case NVPTXISD::Tex1DArrayS32FloatGrad: | |||
410 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; | |||
411 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; | |||
412 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; | |||
413 | case NVPTXISD::Tex1DArrayU32FloatLevel: | |||
414 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; | |||
415 | case NVPTXISD::Tex1DArrayU32FloatGrad: | |||
416 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; | |||
417 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; | |||
418 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; | |||
419 | case NVPTXISD::Tex2DFloatFloatLevel: | |||
420 | return "NVPTXISD::Tex2DFloatFloatLevel"; | |||
421 | case NVPTXISD::Tex2DFloatFloatGrad: | |||
422 | return "NVPTXISD::Tex2DFloatFloatGrad"; | |||
423 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; | |||
424 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; | |||
425 | case NVPTXISD::Tex2DS32FloatLevel: | |||
426 | return "NVPTXISD::Tex2DS32FloatLevel"; | |||
427 | case NVPTXISD::Tex2DS32FloatGrad: | |||
428 | return "NVPTXISD::Tex2DS32FloatGrad"; | |||
429 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; | |||
430 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; | |||
431 | case NVPTXISD::Tex2DU32FloatLevel: | |||
432 | return "NVPTXISD::Tex2DU32FloatLevel"; | |||
433 | case NVPTXISD::Tex2DU32FloatGrad: | |||
434 | return "NVPTXISD::Tex2DU32FloatGrad"; | |||
435 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; | |||
436 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; | |||
437 | case NVPTXISD::Tex2DArrayFloatFloatLevel: | |||
438 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; | |||
439 | case NVPTXISD::Tex2DArrayFloatFloatGrad: | |||
440 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; | |||
441 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; | |||
442 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; | |||
443 | case NVPTXISD::Tex2DArrayS32FloatLevel: | |||
444 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; | |||
445 | case NVPTXISD::Tex2DArrayS32FloatGrad: | |||
446 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; | |||
447 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; | |||
448 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; | |||
449 | case NVPTXISD::Tex2DArrayU32FloatLevel: | |||
450 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; | |||
451 | case NVPTXISD::Tex2DArrayU32FloatGrad: | |||
452 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; | |||
453 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; | |||
454 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; | |||
455 | case NVPTXISD::Tex3DFloatFloatLevel: | |||
456 | return "NVPTXISD::Tex3DFloatFloatLevel"; | |||
457 | case NVPTXISD::Tex3DFloatFloatGrad: | |||
458 | return "NVPTXISD::Tex3DFloatFloatGrad"; | |||
459 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; | |||
460 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; | |||
461 | case NVPTXISD::Tex3DS32FloatLevel: | |||
462 | return "NVPTXISD::Tex3DS32FloatLevel"; | |||
463 | case NVPTXISD::Tex3DS32FloatGrad: | |||
464 | return "NVPTXISD::Tex3DS32FloatGrad"; | |||
465 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; | |||
466 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; | |||
467 | case NVPTXISD::Tex3DU32FloatLevel: | |||
468 | return "NVPTXISD::Tex3DU32FloatLevel"; | |||
469 | case NVPTXISD::Tex3DU32FloatGrad: | |||
470 | return "NVPTXISD::Tex3DU32FloatGrad"; | |||
471 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; | |||
472 | case NVPTXISD::TexCubeFloatFloatLevel: | |||
473 | return "NVPTXISD::TexCubeFloatFloatLevel"; | |||
474 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; | |||
475 | case NVPTXISD::TexCubeS32FloatLevel: | |||
476 | return "NVPTXISD::TexCubeS32FloatLevel"; | |||
477 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; | |||
478 | case NVPTXISD::TexCubeU32FloatLevel: | |||
479 | return "NVPTXISD::TexCubeU32FloatLevel"; | |||
480 | case NVPTXISD::TexCubeArrayFloatFloat: | |||
481 | return "NVPTXISD::TexCubeArrayFloatFloat"; | |||
482 | case NVPTXISD::TexCubeArrayFloatFloatLevel: | |||
483 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; | |||
484 | case NVPTXISD::TexCubeArrayS32Float: | |||
485 | return "NVPTXISD::TexCubeArrayS32Float"; | |||
486 | case NVPTXISD::TexCubeArrayS32FloatLevel: | |||
487 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; | |||
488 | case NVPTXISD::TexCubeArrayU32Float: | |||
489 | return "NVPTXISD::TexCubeArrayU32Float"; | |||
490 | case NVPTXISD::TexCubeArrayU32FloatLevel: | |||
491 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; | |||
492 | case NVPTXISD::Tld4R2DFloatFloat: | |||
493 | return "NVPTXISD::Tld4R2DFloatFloat"; | |||
494 | case NVPTXISD::Tld4G2DFloatFloat: | |||
495 | return "NVPTXISD::Tld4G2DFloatFloat"; | |||
496 | case NVPTXISD::Tld4B2DFloatFloat: | |||
497 | return "NVPTXISD::Tld4B2DFloatFloat"; | |||
498 | case NVPTXISD::Tld4A2DFloatFloat: | |||
499 | return "NVPTXISD::Tld4A2DFloatFloat"; | |||
500 | case NVPTXISD::Tld4R2DS64Float: | |||
501 | return "NVPTXISD::Tld4R2DS64Float"; | |||
502 | case NVPTXISD::Tld4G2DS64Float: | |||
503 | return "NVPTXISD::Tld4G2DS64Float"; | |||
504 | case NVPTXISD::Tld4B2DS64Float: | |||
505 | return "NVPTXISD::Tld4B2DS64Float"; | |||
506 | case NVPTXISD::Tld4A2DS64Float: | |||
507 | return "NVPTXISD::Tld4A2DS64Float"; | |||
508 | case NVPTXISD::Tld4R2DU64Float: | |||
509 | return "NVPTXISD::Tld4R2DU64Float"; | |||
510 | case NVPTXISD::Tld4G2DU64Float: | |||
511 | return "NVPTXISD::Tld4G2DU64Float"; | |||
512 | case NVPTXISD::Tld4B2DU64Float: | |||
513 | return "NVPTXISD::Tld4B2DU64Float"; | |||
514 | case NVPTXISD::Tld4A2DU64Float: | |||
515 | return "NVPTXISD::Tld4A2DU64Float"; | |||
516 | ||||
517 | case NVPTXISD::TexUnified1DFloatS32: | |||
518 | return "NVPTXISD::TexUnified1DFloatS32"; | |||
519 | case NVPTXISD::TexUnified1DFloatFloat: | |||
520 | return "NVPTXISD::TexUnified1DFloatFloat"; | |||
521 | case NVPTXISD::TexUnified1DFloatFloatLevel: | |||
522 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; | |||
523 | case NVPTXISD::TexUnified1DFloatFloatGrad: | |||
524 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; | |||
525 | case NVPTXISD::TexUnified1DS32S32: | |||
526 | return "NVPTXISD::TexUnified1DS32S32"; | |||
527 | case NVPTXISD::TexUnified1DS32Float: | |||
528 | return "NVPTXISD::TexUnified1DS32Float"; | |||
529 | case NVPTXISD::TexUnified1DS32FloatLevel: | |||
530 | return "NVPTXISD::TexUnified1DS32FloatLevel"; | |||
531 | case NVPTXISD::TexUnified1DS32FloatGrad: | |||
532 | return "NVPTXISD::TexUnified1DS32FloatGrad"; | |||
533 | case NVPTXISD::TexUnified1DU32S32: | |||
534 | return "NVPTXISD::TexUnified1DU32S32"; | |||
535 | case NVPTXISD::TexUnified1DU32Float: | |||
536 | return "NVPTXISD::TexUnified1DU32Float"; | |||
537 | case NVPTXISD::TexUnified1DU32FloatLevel: | |||
538 | return "NVPTXISD::TexUnified1DU32FloatLevel"; | |||
539 | case NVPTXISD::TexUnified1DU32FloatGrad: | |||
540 | return "NVPTXISD::TexUnified1DU32FloatGrad"; | |||
541 | case NVPTXISD::TexUnified1DArrayFloatS32: | |||
542 | return "NVPTXISD::TexUnified1DArrayFloatS32"; | |||
543 | case NVPTXISD::TexUnified1DArrayFloatFloat: | |||
544 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; | |||
545 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: | |||
546 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; | |||
547 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: | |||
548 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; | |||
549 | case NVPTXISD::TexUnified1DArrayS32S32: | |||
550 | return "NVPTXISD::TexUnified1DArrayS32S32"; | |||
551 | case NVPTXISD::TexUnified1DArrayS32Float: | |||
552 | return "NVPTXISD::TexUnified1DArrayS32Float"; | |||
553 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: | |||
554 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; | |||
555 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: | |||
556 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; | |||
557 | case NVPTXISD::TexUnified1DArrayU32S32: | |||
558 | return "NVPTXISD::TexUnified1DArrayU32S32"; | |||
559 | case NVPTXISD::TexUnified1DArrayU32Float: | |||
560 | return "NVPTXISD::TexUnified1DArrayU32Float"; | |||
561 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: | |||
562 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; | |||
563 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: | |||
564 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; | |||
565 | case NVPTXISD::TexUnified2DFloatS32: | |||
566 | return "NVPTXISD::TexUnified2DFloatS32"; | |||
567 | case NVPTXISD::TexUnified2DFloatFloat: | |||
568 | return "NVPTXISD::TexUnified2DFloatFloat"; | |||
569 | case NVPTXISD::TexUnified2DFloatFloatLevel: | |||
570 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; | |||
571 | case NVPTXISD::TexUnified2DFloatFloatGrad: | |||
572 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; | |||
573 | case NVPTXISD::TexUnified2DS32S32: | |||
574 | return "NVPTXISD::TexUnified2DS32S32"; | |||
575 | case NVPTXISD::TexUnified2DS32Float: | |||
576 | return "NVPTXISD::TexUnified2DS32Float"; | |||
577 | case NVPTXISD::TexUnified2DS32FloatLevel: | |||
578 | return "NVPTXISD::TexUnified2DS32FloatLevel"; | |||
579 | case NVPTXISD::TexUnified2DS32FloatGrad: | |||
580 | return "NVPTXISD::TexUnified2DS32FloatGrad"; | |||
581 | case NVPTXISD::TexUnified2DU32S32: | |||
582 | return "NVPTXISD::TexUnified2DU32S32"; | |||
583 | case NVPTXISD::TexUnified2DU32Float: | |||
584 | return "NVPTXISD::TexUnified2DU32Float"; | |||
585 | case NVPTXISD::TexUnified2DU32FloatLevel: | |||
586 | return "NVPTXISD::TexUnified2DU32FloatLevel"; | |||
587 | case NVPTXISD::TexUnified2DU32FloatGrad: | |||
588 | return "NVPTXISD::TexUnified2DU32FloatGrad"; | |||
589 | case NVPTXISD::TexUnified2DArrayFloatS32: | |||
590 | return "NVPTXISD::TexUnified2DArrayFloatS32"; | |||
591 | case NVPTXISD::TexUnified2DArrayFloatFloat: | |||
592 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; | |||
593 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: | |||
594 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; | |||
595 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: | |||
596 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; | |||
597 | case NVPTXISD::TexUnified2DArrayS32S32: | |||
598 | return "NVPTXISD::TexUnified2DArrayS32S32"; | |||
599 | case NVPTXISD::TexUnified2DArrayS32Float: | |||
600 | return "NVPTXISD::TexUnified2DArrayS32Float"; | |||
601 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: | |||
602 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; | |||
603 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: | |||
604 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; | |||
605 | case NVPTXISD::TexUnified2DArrayU32S32: | |||
606 | return "NVPTXISD::TexUnified2DArrayU32S32"; | |||
607 | case NVPTXISD::TexUnified2DArrayU32Float: | |||
608 | return "NVPTXISD::TexUnified2DArrayU32Float"; | |||
609 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: | |||
610 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; | |||
611 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: | |||
612 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; | |||
613 | case NVPTXISD::TexUnified3DFloatS32: | |||
614 | return "NVPTXISD::TexUnified3DFloatS32"; | |||
615 | case NVPTXISD::TexUnified3DFloatFloat: | |||
616 | return "NVPTXISD::TexUnified3DFloatFloat"; | |||
617 | case NVPTXISD::TexUnified3DFloatFloatLevel: | |||
618 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; | |||
619 | case NVPTXISD::TexUnified3DFloatFloatGrad: | |||
620 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; | |||
621 | case NVPTXISD::TexUnified3DS32S32: | |||
622 | return "NVPTXISD::TexUnified3DS32S32"; | |||
623 | case NVPTXISD::TexUnified3DS32Float: | |||
624 | return "NVPTXISD::TexUnified3DS32Float"; | |||
625 | case NVPTXISD::TexUnified3DS32FloatLevel: | |||
626 | return "NVPTXISD::TexUnified3DS32FloatLevel"; | |||
627 | case NVPTXISD::TexUnified3DS32FloatGrad: | |||
628 | return "NVPTXISD::TexUnified3DS32FloatGrad"; | |||
629 | case NVPTXISD::TexUnified3DU32S32: | |||
630 | return "NVPTXISD::TexUnified3DU32S32"; | |||
631 | case NVPTXISD::TexUnified3DU32Float: | |||
632 | return "NVPTXISD::TexUnified3DU32Float"; | |||
633 | case NVPTXISD::TexUnified3DU32FloatLevel: | |||
634 | return "NVPTXISD::TexUnified3DU32FloatLevel"; | |||
635 | case NVPTXISD::TexUnified3DU32FloatGrad: | |||
636 | return "NVPTXISD::TexUnified3DU32FloatGrad"; | |||
637 | case NVPTXISD::TexUnifiedCubeFloatFloat: | |||
638 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; | |||
639 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: | |||
640 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; | |||
641 | case NVPTXISD::TexUnifiedCubeS32Float: | |||
642 | return "NVPTXISD::TexUnifiedCubeS32Float"; | |||
643 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: | |||
644 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; | |||
645 | case NVPTXISD::TexUnifiedCubeU32Float: | |||
646 | return "NVPTXISD::TexUnifiedCubeU32Float"; | |||
647 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: | |||
648 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; | |||
649 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: | |||
650 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; | |||
651 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: | |||
652 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; | |||
653 | case NVPTXISD::TexUnifiedCubeArrayS32Float: | |||
654 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; | |||
655 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: | |||
656 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; | |||
657 | case NVPTXISD::TexUnifiedCubeArrayU32Float: | |||
658 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; | |||
659 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: | |||
660 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; | |||
661 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: | |||
662 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; | |||
663 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: | |||
664 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; | |||
665 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: | |||
666 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; | |||
667 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: | |||
668 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; | |||
669 | case NVPTXISD::Tld4UnifiedR2DS64Float: | |||
670 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; | |||
671 | case NVPTXISD::Tld4UnifiedG2DS64Float: | |||
672 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; | |||
673 | case NVPTXISD::Tld4UnifiedB2DS64Float: | |||
674 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; | |||
675 | case NVPTXISD::Tld4UnifiedA2DS64Float: | |||
676 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; | |||
677 | case NVPTXISD::Tld4UnifiedR2DU64Float: | |||
678 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; | |||
679 | case NVPTXISD::Tld4UnifiedG2DU64Float: | |||
680 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; | |||
681 | case NVPTXISD::Tld4UnifiedB2DU64Float: | |||
682 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; | |||
683 | case NVPTXISD::Tld4UnifiedA2DU64Float: | |||
684 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; | |||
685 | ||||
686 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; | |||
687 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; | |||
688 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; | |||
689 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; | |||
690 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; | |||
691 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; | |||
692 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; | |||
693 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; | |||
694 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; | |||
695 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; | |||
696 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; | |||
697 | ||||
698 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; | |||
699 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; | |||
700 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; | |||
701 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; | |||
702 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; | |||
703 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; | |||
704 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; | |||
705 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; | |||
706 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; | |||
707 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; | |||
708 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; | |||
709 | ||||
710 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; | |||
711 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; | |||
712 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; | |||
713 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; | |||
714 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; | |||
715 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; | |||
716 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; | |||
717 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; | |||
718 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; | |||
719 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; | |||
720 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; | |||
721 | ||||
722 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; | |||
723 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; | |||
724 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; | |||
725 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; | |||
726 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; | |||
727 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; | |||
728 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; | |||
729 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; | |||
730 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; | |||
731 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; | |||
732 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; | |||
733 | ||||
734 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; | |||
735 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; | |||
736 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; | |||
737 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; | |||
738 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; | |||
739 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; | |||
740 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; | |||
741 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; | |||
742 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; | |||
743 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; | |||
744 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; | |||
745 | ||||
746 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; | |||
747 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; | |||
748 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; | |||
749 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; | |||
750 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; | |||
751 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; | |||
752 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; | |||
753 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; | |||
754 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; | |||
755 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; | |||
756 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; | |||
757 | ||||
758 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; | |||
759 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; | |||
760 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; | |||
761 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; | |||
762 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; | |||
763 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; | |||
764 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; | |||
765 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; | |||
766 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; | |||
767 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; | |||
768 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; | |||
769 | ||||
770 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; | |||
771 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; | |||
772 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; | |||
773 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; | |||
774 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; | |||
775 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; | |||
776 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; | |||
777 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; | |||
778 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; | |||
779 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; | |||
780 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; | |||
781 | ||||
782 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; | |||
783 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; | |||
784 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; | |||
785 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; | |||
786 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; | |||
787 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; | |||
788 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; | |||
789 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; | |||
790 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; | |||
791 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; | |||
792 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; | |||
793 | ||||
794 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; | |||
795 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; | |||
796 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; | |||
797 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; | |||
798 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; | |||
799 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; | |||
800 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; | |||
801 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; | |||
802 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; | |||
803 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; | |||
804 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; | |||
805 | ||||
806 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; | |||
807 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; | |||
808 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; | |||
809 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; | |||
810 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; | |||
811 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; | |||
812 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; | |||
813 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; | |||
814 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; | |||
815 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; | |||
816 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; | |||
817 | ||||
818 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; | |||
819 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; | |||
820 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; | |||
821 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; | |||
822 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; | |||
823 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; | |||
824 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; | |||
825 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; | |||
826 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; | |||
827 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; | |||
828 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; | |||
829 | ||||
830 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; | |||
831 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; | |||
832 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; | |||
833 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; | |||
834 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; | |||
835 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; | |||
836 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; | |||
837 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; | |||
838 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; | |||
839 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; | |||
840 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; | |||
841 | ||||
842 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; | |||
843 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; | |||
844 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; | |||
845 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; | |||
846 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; | |||
847 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; | |||
848 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; | |||
849 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; | |||
850 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; | |||
851 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; | |||
852 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; | |||
853 | ||||
854 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; | |||
855 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; | |||
856 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; | |||
857 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; | |||
858 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; | |||
859 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; | |||
860 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; | |||
861 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; | |||
862 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; | |||
863 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; | |||
864 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; | |||
865 | } | |||
866 | return nullptr; | |||
867 | } | |||
868 | ||||
869 | TargetLoweringBase::LegalizeTypeAction | |||
870 | NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { | |||
871 | if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) | |||
872 | return TypeSplitVector; | |||
873 | ||||
874 | return TargetLoweringBase::getPreferredVectorAction(VT); | |||
875 | } | |||
876 | ||||
877 | SDValue | |||
878 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { | |||
879 | SDLoc dl(Op); | |||
880 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
881 | Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); | |||
882 | return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); | |||
883 | } | |||
884 | ||||
885 | std::string | |||
886 | NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, | |||
887 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
888 | unsigned retAlignment, | |||
889 | const ImmutableCallSite *CS) const { | |||
890 | ||||
891 | bool isABI = (STI.getSmVersion() >= 20); | |||
892 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 892, __PRETTY_FUNCTION__)); | |||
893 | if (!isABI) | |||
894 | return ""; | |||
895 | ||||
896 | std::stringstream O; | |||
897 | O << "prototype_" << uniqueCallSite << " : .callprototype "; | |||
898 | ||||
899 | if (retTy->getTypeID() == Type::VoidTyID) { | |||
900 | O << "()"; | |||
901 | } else { | |||
902 | O << "("; | |||
903 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { | |||
904 | unsigned size = 0; | |||
905 | if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { | |||
906 | size = ITy->getBitWidth(); | |||
907 | if (size < 32) | |||
908 | size = 32; | |||
909 | } else { | |||
910 | assert(retTy->isFloatingPointTy() &&((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 911, __PRETTY_FUNCTION__)) | |||
911 | "Floating point type expected here")((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 911, __PRETTY_FUNCTION__)); | |||
912 | size = retTy->getPrimitiveSizeInBits(); | |||
913 | } | |||
914 | ||||
915 | O << ".param .b" << size << " _"; | |||
916 | } else if (isa<PointerType>(retTy)) { | |||
917 | O << ".param .b" << getPointerTy().getSizeInBits() << " _"; | |||
918 | } else if ((retTy->getTypeID() == Type::StructTyID) || | |||
919 | isa<VectorType>(retTy)) { | |||
920 | O << ".param .align " | |||
921 | << retAlignment | |||
922 | << " .b8 _[" | |||
923 | << getDataLayout()->getTypeAllocSize(retTy) << "]"; | |||
924 | } else { | |||
925 | llvm_unreachable("Unknown return type")::llvm::llvm_unreachable_internal("Unknown return type", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 925); | |||
926 | } | |||
927 | O << ") "; | |||
928 | } | |||
929 | O << "_ ("; | |||
930 | ||||
931 | bool first = true; | |||
932 | MVT thePointerTy = getPointerTy(); | |||
933 | ||||
934 | unsigned OIdx = 0; | |||
935 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
936 | Type *Ty = Args[i].Ty; | |||
937 | if (!first) { | |||
938 | O << ", "; | |||
939 | } | |||
940 | first = false; | |||
941 | ||||
942 | if (!Outs[OIdx].Flags.isByVal()) { | |||
943 | if (Ty->isAggregateType() || Ty->isVectorTy()) { | |||
944 | unsigned align = 0; | |||
945 | const CallInst *CallI = cast<CallInst>(CS->getInstruction()); | |||
946 | const DataLayout *TD = getDataLayout(); | |||
947 | // +1 because index 0 is reserved for return type alignment | |||
948 | if (!llvm::getAlign(*CallI, i + 1, align)) | |||
949 | align = TD->getABITypeAlignment(Ty); | |||
950 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
951 | O << ".param .align " << align << " .b8 "; | |||
952 | O << "_"; | |||
953 | O << "[" << sz << "]"; | |||
954 | // update the index for Outs | |||
955 | SmallVector<EVT, 16> vtparts; | |||
956 | ComputeValueVTs(*this, Ty, vtparts); | |||
957 | if (unsigned len = vtparts.size()) | |||
958 | OIdx += len - 1; | |||
959 | continue; | |||
960 | } | |||
961 | // i8 types in IR will be i16 types in SDAG | |||
962 | assert((getValueType(Ty) == Outs[OIdx].VT ||(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 964, __PRETTY_FUNCTION__)) | |||
963 | (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 964, __PRETTY_FUNCTION__)) | |||
964 | "type mismatch between callee prototype and arguments")(((getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT ::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments" ) ? static_cast<void> (0) : __assert_fail ("(getValueType(Ty) == Outs[OIdx].VT || (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 964, __PRETTY_FUNCTION__)); | |||
965 | // scalar type | |||
966 | unsigned sz = 0; | |||
967 | if (isa<IntegerType>(Ty)) { | |||
968 | sz = cast<IntegerType>(Ty)->getBitWidth(); | |||
969 | if (sz < 32) | |||
970 | sz = 32; | |||
971 | } else if (isa<PointerType>(Ty)) | |||
972 | sz = thePointerTy.getSizeInBits(); | |||
973 | else | |||
974 | sz = Ty->getPrimitiveSizeInBits(); | |||
975 | O << ".param .b" << sz << " "; | |||
976 | O << "_"; | |||
977 | continue; | |||
978 | } | |||
979 | const PointerType *PTy = dyn_cast<PointerType>(Ty); | |||
980 | assert(PTy && "Param with byval attribute should be a pointer type")((PTy && "Param with byval attribute should be a pointer type" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Param with byval attribute should be a pointer type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 980, __PRETTY_FUNCTION__)); | |||
981 | Type *ETy = PTy->getElementType(); | |||
982 | ||||
983 | unsigned align = Outs[OIdx].Flags.getByValAlign(); | |||
984 | unsigned sz = getDataLayout()->getTypeAllocSize(ETy); | |||
985 | O << ".param .align " << align << " .b8 "; | |||
986 | O << "_"; | |||
987 | O << "[" << sz << "]"; | |||
988 | } | |||
989 | O << ");"; | |||
990 | return O.str(); | |||
991 | } | |||
992 | ||||
993 | unsigned | |||
994 | NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, | |||
995 | const ImmutableCallSite *CS, | |||
996 | Type *Ty, | |||
997 | unsigned Idx) const { | |||
998 | const DataLayout *TD = getDataLayout(); | |||
999 | unsigned Align = 0; | |||
1000 | const Value *DirectCallee = CS->getCalledFunction(); | |||
1001 | ||||
1002 | if (!DirectCallee) { | |||
1003 | // We don't have a direct function symbol, but that may be because of | |||
1004 | // constant cast instructions in the call. | |||
1005 | const Instruction *CalleeI = CS->getInstruction(); | |||
1006 | assert(CalleeI && "Call target is not a function or derived value?")((CalleeI && "Call target is not a function or derived value?" ) ? static_cast<void> (0) : __assert_fail ("CalleeI && \"Call target is not a function or derived value?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1006, __PRETTY_FUNCTION__)); | |||
1007 | ||||
1008 | // With bitcast'd call targets, the instruction will be the call | |||
1009 | if (isa<CallInst>(CalleeI)) { | |||
1010 | // Check if we have call alignment metadata | |||
1011 | if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) | |||
1012 | return Align; | |||
1013 | ||||
1014 | const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); | |||
1015 | // Ignore any bitcast instructions | |||
1016 | while(isa<ConstantExpr>(CalleeV)) { | |||
1017 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); | |||
1018 | if (!CE->isCast()) | |||
1019 | break; | |||
1020 | // Look through the bitcast | |||
1021 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); | |||
1022 | } | |||
1023 | ||||
1024 | // We have now looked past all of the bitcasts. Do we finally have a | |||
1025 | // Function? | |||
1026 | if (isa<Function>(CalleeV)) | |||
1027 | DirectCallee = CalleeV; | |||
1028 | } | |||
1029 | } | |||
1030 | ||||
1031 | // Check for function alignment information if we found that the | |||
1032 | // ultimate target is a Function | |||
1033 | if (DirectCallee) | |||
1034 | if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) | |||
1035 | return Align; | |||
1036 | ||||
1037 | // Call is indirect or alignment information is not available, fall back to | |||
1038 | // the ABI type alignment | |||
1039 | return TD->getABITypeAlignment(Ty); | |||
1040 | } | |||
1041 | ||||
1042 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||
1043 | SmallVectorImpl<SDValue> &InVals) const { | |||
1044 | SelectionDAG &DAG = CLI.DAG; | |||
1045 | SDLoc dl = CLI.DL; | |||
1046 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | |||
1047 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | |||
1048 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | |||
1049 | SDValue Chain = CLI.Chain; | |||
1050 | SDValue Callee = CLI.Callee; | |||
1051 | bool &isTailCall = CLI.IsTailCall; | |||
1052 | ArgListTy &Args = CLI.getArgs(); | |||
1053 | Type *retTy = CLI.RetTy; | |||
| ||||
1054 | ImmutableCallSite *CS = CLI.CS; | |||
1055 | ||||
1056 | bool isABI = (STI.getSmVersion() >= 20); | |||
1057 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1057, __PRETTY_FUNCTION__)); | |||
1058 | if (!isABI) | |||
1059 | return Chain; | |||
1060 | const DataLayout *TD = getDataLayout(); | |||
1061 | MachineFunction &MF = DAG.getMachineFunction(); | |||
1062 | const Function *F = MF.getFunction(); | |||
1063 | ||||
1064 | SDValue tempChain = Chain; | |||
1065 | Chain = DAG.getCALLSEQ_START(Chain, | |||
1066 | DAG.getIntPtrConstant(uniqueCallSite, dl, true), | |||
1067 | dl); | |||
1068 | SDValue InFlag = Chain.getValue(1); | |||
1069 | ||||
1070 | unsigned paramCount = 0; | |||
1071 | // Args.size() and Outs.size() need not match. | |||
1072 | // Outs.size() will be larger | |||
1073 | // * if there is an aggregate argument with multiple fields (each field | |||
1074 | // showing up separately in Outs) | |||
1075 | // * if there is a vector argument with more than typical vector-length | |||
1076 | // elements (generally if more than 4) where each vector element is | |||
1077 | // individually present in Outs. | |||
1078 | // So a different index should be used for indexing into Outs/OutVals. | |||
1079 | // See similar issue in LowerFormalArguments. | |||
1080 | unsigned OIdx = 0; | |||
1081 | // Declare the .params or .reg need to pass values | |||
1082 | // to the function | |||
1083 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
1084 | EVT VT = Outs[OIdx].VT; | |||
1085 | Type *Ty = Args[i].Ty; | |||
1086 | ||||
1087 | if (!Outs[OIdx].Flags.isByVal()) { | |||
1088 | if (Ty->isAggregateType()) { | |||
1089 | // aggregate | |||
1090 | SmallVector<EVT, 16> vtparts; | |||
1091 | SmallVector<uint64_t, 16> Offsets; | |||
1092 | ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); | |||
1093 | ||||
1094 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); | |||
1095 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1096 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
1097 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1098 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, | |||
1099 | MVT::i32), | |||
1100 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1101 | DAG.getConstant(sz, dl, MVT::i32), | |||
1102 | InFlag }; | |||
1103 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1104 | DeclareParamOps); | |||
1105 | InFlag = Chain.getValue(1); | |||
1106 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1107 | EVT elemtype = vtparts[j]; | |||
1108 | unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); | |||
1109 | if (elemtype.isInteger() && (sz < 8)) | |||
1110 | sz = 8; | |||
1111 | SDValue StVal = OutVals[OIdx]; | |||
1112 | if (elemtype.getSizeInBits() < 16) { | |||
1113 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); | |||
1114 | } | |||
1115 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1116 | SDValue CopyParamOps[] = { Chain, | |||
1117 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1118 | DAG.getConstant(Offsets[j], dl, MVT::i32), | |||
1119 | StVal, InFlag }; | |||
1120 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1121 | CopyParamVTs, CopyParamOps, | |||
1122 | elemtype, MachinePointerInfo(), | |||
1123 | ArgAlign); | |||
1124 | InFlag = Chain.getValue(1); | |||
1125 | ++OIdx; | |||
1126 | } | |||
1127 | if (vtparts.size() > 0) | |||
1128 | --OIdx; | |||
1129 | ++paramCount; | |||
1130 | continue; | |||
1131 | } | |||
1132 | if (Ty->isVectorTy()) { | |||
1133 | EVT ObjectVT = getValueType(Ty); | |||
1134 | unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); | |||
1135 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1136 | unsigned sz = TD->getTypeAllocSize(Ty); | |||
1137 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1138 | SDValue DeclareParamOps[] = { Chain, | |||
1139 | DAG.getConstant(align, dl, MVT::i32), | |||
1140 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1141 | DAG.getConstant(sz, dl, MVT::i32), | |||
1142 | InFlag }; | |||
1143 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1144 | DeclareParamOps); | |||
1145 | InFlag = Chain.getValue(1); | |||
1146 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1147 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1148 | EVT MemVT = EltVT; | |||
1149 | bool NeedExtend = false; | |||
1150 | if (EltVT.getSizeInBits() < 16) { | |||
1151 | NeedExtend = true; | |||
1152 | EltVT = MVT::i16; | |||
1153 | } | |||
1154 | ||||
1155 | // V1 store | |||
1156 | if (NumElts == 1) { | |||
1157 | SDValue Elt = OutVals[OIdx++]; | |||
1158 | if (NeedExtend) | |||
1159 | Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); | |||
1160 | ||||
1161 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1162 | SDValue CopyParamOps[] = { Chain, | |||
1163 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1164 | DAG.getConstant(0, dl, MVT::i32), Elt, | |||
1165 | InFlag }; | |||
1166 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1167 | CopyParamVTs, CopyParamOps, | |||
1168 | MemVT, MachinePointerInfo()); | |||
1169 | InFlag = Chain.getValue(1); | |||
1170 | } else if (NumElts == 2) { | |||
1171 | SDValue Elt0 = OutVals[OIdx++]; | |||
1172 | SDValue Elt1 = OutVals[OIdx++]; | |||
1173 | if (NeedExtend) { | |||
1174 | Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); | |||
1175 | Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); | |||
1176 | } | |||
1177 | ||||
1178 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1179 | SDValue CopyParamOps[] = { Chain, | |||
1180 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1181 | DAG.getConstant(0, dl, MVT::i32), Elt0, | |||
1182 | Elt1, InFlag }; | |||
1183 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, | |||
1184 | CopyParamVTs, CopyParamOps, | |||
1185 | MemVT, MachinePointerInfo()); | |||
1186 | InFlag = Chain.getValue(1); | |||
1187 | } else { | |||
1188 | unsigned curOffset = 0; | |||
1189 | // V4 stores | |||
1190 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
1191 | // the | |||
1192 | // vector will be expanded to a power of 2 elements, so we know we can | |||
1193 | // always round up to the next multiple of 4 when creating the vector | |||
1194 | // stores. | |||
1195 | // e.g. 4 elem => 1 st.v4 | |||
1196 | // 6 elem => 2 st.v4 | |||
1197 | // 8 elem => 2 st.v4 | |||
1198 | // 11 elem => 3 st.v4 | |||
1199 | unsigned VecSize = 4; | |||
1200 | if (EltVT.getSizeInBits() == 64) | |||
1201 | VecSize = 2; | |||
1202 | ||||
1203 | // This is potentially only part of a vector, so assume all elements | |||
1204 | // are packed together. | |||
1205 | unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; | |||
1206 | ||||
1207 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1208 | // Get values | |||
1209 | SDValue StoreVal; | |||
1210 | SmallVector<SDValue, 8> Ops; | |||
1211 | Ops.push_back(Chain); | |||
1212 | Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); | |||
1213 | Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32)); | |||
1214 | ||||
1215 | unsigned Opc = NVPTXISD::StoreParamV2; | |||
1216 | ||||
1217 | StoreVal = OutVals[OIdx++]; | |||
1218 | if (NeedExtend) | |||
1219 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1220 | Ops.push_back(StoreVal); | |||
1221 | ||||
1222 | if (i + 1 < NumElts) { | |||
1223 | StoreVal = OutVals[OIdx++]; | |||
1224 | if (NeedExtend) | |||
1225 | StoreVal = | |||
1226 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1227 | } else { | |||
1228 | StoreVal = DAG.getUNDEF(EltVT); | |||
1229 | } | |||
1230 | Ops.push_back(StoreVal); | |||
1231 | ||||
1232 | if (VecSize == 4) { | |||
1233 | Opc = NVPTXISD::StoreParamV4; | |||
1234 | if (i + 2 < NumElts) { | |||
1235 | StoreVal = OutVals[OIdx++]; | |||
1236 | if (NeedExtend) | |||
1237 | StoreVal = | |||
1238 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1239 | } else { | |||
1240 | StoreVal = DAG.getUNDEF(EltVT); | |||
1241 | } | |||
1242 | Ops.push_back(StoreVal); | |||
1243 | ||||
1244 | if (i + 3 < NumElts) { | |||
1245 | StoreVal = OutVals[OIdx++]; | |||
1246 | if (NeedExtend) | |||
1247 | StoreVal = | |||
1248 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1249 | } else { | |||
1250 | StoreVal = DAG.getUNDEF(EltVT); | |||
1251 | } | |||
1252 | Ops.push_back(StoreVal); | |||
1253 | } | |||
1254 | ||||
1255 | Ops.push_back(InFlag); | |||
1256 | ||||
1257 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1258 | Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, | |||
1259 | MemVT, MachinePointerInfo()); | |||
1260 | InFlag = Chain.getValue(1); | |||
1261 | curOffset += PerStoreOffset; | |||
1262 | } | |||
1263 | } | |||
1264 | ++paramCount; | |||
1265 | --OIdx; | |||
1266 | continue; | |||
1267 | } | |||
1268 | // Plain scalar | |||
1269 | // for ABI, declare .param .b<size> .param<n>; | |||
1270 | unsigned sz = VT.getSizeInBits(); | |||
1271 | bool needExtend = false; | |||
1272 | if (VT.isInteger()) { | |||
1273 | if (sz < 16) | |||
1274 | needExtend = true; | |||
1275 | if (sz < 32) | |||
1276 | sz = 32; | |||
1277 | } | |||
1278 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1279 | SDValue DeclareParamOps[] = { Chain, | |||
1280 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1281 | DAG.getConstant(sz, dl, MVT::i32), | |||
1282 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1283 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, | |||
1284 | DeclareParamOps); | |||
1285 | InFlag = Chain.getValue(1); | |||
1286 | SDValue OutV = OutVals[OIdx]; | |||
1287 | if (needExtend) { | |||
1288 | // zext/sext i1 to i16 | |||
1289 | unsigned opc = ISD::ZERO_EXTEND; | |||
1290 | if (Outs[OIdx].Flags.isSExt()) | |||
1291 | opc = ISD::SIGN_EXTEND; | |||
1292 | OutV = DAG.getNode(opc, dl, MVT::i16, OutV); | |||
1293 | } | |||
1294 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1295 | SDValue CopyParamOps[] = { Chain, | |||
1296 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1297 | DAG.getConstant(0, dl, MVT::i32), OutV, | |||
1298 | InFlag }; | |||
1299 | ||||
1300 | unsigned opcode = NVPTXISD::StoreParam; | |||
1301 | if (Outs[OIdx].Flags.isZExt()) | |||
1302 | opcode = NVPTXISD::StoreParamU32; | |||
1303 | else if (Outs[OIdx].Flags.isSExt()) | |||
1304 | opcode = NVPTXISD::StoreParamS32; | |||
1305 | Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, | |||
1306 | VT, MachinePointerInfo()); | |||
1307 | ||||
1308 | InFlag = Chain.getValue(1); | |||
1309 | ++paramCount; | |||
1310 | continue; | |||
1311 | } | |||
1312 | // struct or vector | |||
1313 | SmallVector<EVT, 16> vtparts; | |||
1314 | SmallVector<uint64_t, 16> Offsets; | |||
1315 | const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); | |||
1316 | assert(PTy && "Type of a byval parameter should be pointer")((PTy && "Type of a byval parameter should be pointer" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Type of a byval parameter should be pointer\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1316, __PRETTY_FUNCTION__)); | |||
1317 | ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); | |||
1318 | ||||
1319 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1320 | unsigned sz = Outs[OIdx].Flags.getByValSize(); | |||
1321 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1322 | unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); | |||
1323 | // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, | |||
1324 | // so we don't need to worry about natural alignment or not. | |||
1325 | // See TargetLowering::LowerCallTo(). | |||
1326 | SDValue DeclareParamOps[] = { | |||
1327 | Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32), | |||
1328 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1329 | DAG.getConstant(sz, dl, MVT::i32), InFlag | |||
1330 | }; | |||
1331 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1332 | DeclareParamOps); | |||
1333 | InFlag = Chain.getValue(1); | |||
1334 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1335 | EVT elemtype = vtparts[j]; | |||
1336 | int curOffset = Offsets[j]; | |||
1337 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); | |||
1338 | SDValue srcAddr = | |||
1339 | DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], | |||
1340 | DAG.getConstant(curOffset, dl, getPointerTy())); | |||
1341 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, | |||
1342 | MachinePointerInfo(), false, false, false, | |||
1343 | PartAlign); | |||
1344 | if (elemtype.getSizeInBits() < 16) { | |||
1345 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); | |||
1346 | } | |||
1347 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1348 | SDValue CopyParamOps[] = { Chain, | |||
1349 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1350 | DAG.getConstant(curOffset, dl, MVT::i32), | |||
1351 | theVal, InFlag }; | |||
1352 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, | |||
1353 | CopyParamOps, elemtype, | |||
1354 | MachinePointerInfo()); | |||
1355 | ||||
1356 | InFlag = Chain.getValue(1); | |||
1357 | } | |||
1358 | ++paramCount; | |||
1359 | } | |||
1360 | ||||
1361 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); | |||
1362 | unsigned retAlignment = 0; | |||
1363 | ||||
1364 | // Handle Result | |||
1365 | if (Ins.size() > 0) { | |||
1366 | SmallVector<EVT, 16> resvtparts; | |||
1367 | ComputeValueVTs(*this, retTy, resvtparts); | |||
1368 | ||||
1369 | // Declare | |||
1370 | // .param .align 16 .b8 retval0[<size-in-bytes>], or | |||
1371 | // .param .b<size-in-bits> retval0 | |||
1372 | unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); | |||
1373 | // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for | |||
1374 | // these three types to match the logic in | |||
1375 | // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. | |||
1376 | // Plus, this behavior is consistent with nvcc's. | |||
1377 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || | |||
1378 | retTy->isPointerTy()) { | |||
1379 | // Scalar needs to be at least 32bit wide | |||
1380 | if (resultsz < 32) | |||
1381 | resultsz = 32; | |||
1382 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1383 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1384 | DAG.getConstant(resultsz, dl, MVT::i32), | |||
1385 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1386 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, | |||
1387 | DeclareRetOps); | |||
1388 | InFlag = Chain.getValue(1); | |||
1389 | } else { | |||
1390 | retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); | |||
1391 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1392 | SDValue DeclareRetOps[] = { Chain, | |||
1393 | DAG.getConstant(retAlignment, dl, MVT::i32), | |||
1394 | DAG.getConstant(resultsz / 8, dl, MVT::i32), | |||
1395 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1396 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, | |||
1397 | DeclareRetOps); | |||
1398 | InFlag = Chain.getValue(1); | |||
1399 | } | |||
1400 | } | |||
1401 | ||||
1402 | if (!Func) { | |||
1403 | // This is indirect function call case : PTX requires a prototype of the | |||
1404 | // form | |||
1405 | // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); | |||
1406 | // to be emitted, and the label has to used as the last arg of call | |||
1407 | // instruction. | |||
1408 | // The prototype is embedded in a string and put as the operand for a | |||
1409 | // CallPrototype SDNode which will print out to the value of the string. | |||
1410 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1411 | std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); | |||
1412 | const char *ProtoStr = | |||
1413 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); | |||
1414 | SDValue ProtoOps[] = { | |||
1415 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, | |||
1416 | }; | |||
1417 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); | |||
1418 | InFlag = Chain.getValue(1); | |||
1419 | } | |||
1420 | // Op to just print "call" | |||
1421 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1422 | SDValue PrintCallOps[] = { | |||
1423 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag | |||
1424 | }; | |||
1425 | Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), | |||
1426 | dl, PrintCallVTs, PrintCallOps); | |||
1427 | InFlag = Chain.getValue(1); | |||
1428 | ||||
1429 | // Ops to print out the function name | |||
1430 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1431 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; | |||
1432 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); | |||
1433 | InFlag = Chain.getValue(1); | |||
1434 | ||||
1435 | // Ops to print out the param list | |||
1436 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1437 | SDValue CallArgBeginOps[] = { Chain, InFlag }; | |||
1438 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, | |||
1439 | CallArgBeginOps); | |||
1440 | InFlag = Chain.getValue(1); | |||
1441 | ||||
1442 | for (unsigned i = 0, e = paramCount; i != e; ++i) { | |||
1443 | unsigned opcode; | |||
1444 | if (i == (e - 1)) | |||
1445 | opcode = NVPTXISD::LastCallArg; | |||
1446 | else | |||
1447 | opcode = NVPTXISD::CallArg; | |||
1448 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1449 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1450 | DAG.getConstant(i, dl, MVT::i32), InFlag }; | |||
1451 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); | |||
1452 | InFlag = Chain.getValue(1); | |||
1453 | } | |||
1454 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1455 | SDValue CallArgEndOps[] = { Chain, | |||
1456 | DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), | |||
1457 | InFlag }; | |||
1458 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); | |||
1459 | InFlag = Chain.getValue(1); | |||
1460 | ||||
1461 | if (!Func) { | |||
1462 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1463 | SDValue PrototypeOps[] = { Chain, | |||
1464 | DAG.getConstant(uniqueCallSite, dl, MVT::i32), | |||
1465 | InFlag }; | |||
1466 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); | |||
1467 | InFlag = Chain.getValue(1); | |||
1468 | } | |||
1469 | ||||
1470 | // Generate loads from param memory/moves from registers for result | |||
1471 | if (Ins.size() > 0) { | |||
1472 | if (retTy && retTy->isVectorTy()) { | |||
1473 | EVT ObjectVT = getValueType(retTy); | |||
1474 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1475 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1476 | assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1478, __PRETTY_FUNCTION__)) | |||
1477 | ObjectVT) == NumElts &&((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1478, __PRETTY_FUNCTION__)) | |||
1478 | "Vector was not scalarized")((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1478, __PRETTY_FUNCTION__)); | |||
1479 | unsigned sz = EltVT.getSizeInBits(); | |||
1480 | bool needTruncate = sz < 8; | |||
1481 | ||||
1482 | if (NumElts == 1) { | |||
1483 | // Just a simple load | |||
1484 | SmallVector<EVT, 4> LoadRetVTs; | |||
1485 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1486 | // If loading i1/i8 result, generate | |||
1487 | // load.b8 i16 | |||
1488 | // if i1 | |||
1489 | // trunc i16 to i1 | |||
1490 | LoadRetVTs.push_back(MVT::i16); | |||
1491 | } else | |||
1492 | LoadRetVTs.push_back(EltVT); | |||
1493 | LoadRetVTs.push_back(MVT::Other); | |||
1494 | LoadRetVTs.push_back(MVT::Glue); | |||
1495 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1496 | DAG.getConstant(0, dl, MVT::i32), InFlag}; | |||
1497 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1498 | NVPTXISD::LoadParam, dl, | |||
1499 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1500 | Chain = retval.getValue(1); | |||
1501 | InFlag = retval.getValue(2); | |||
1502 | SDValue Ret0 = retval; | |||
1503 | if (needTruncate) | |||
1504 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); | |||
1505 | InVals.push_back(Ret0); | |||
1506 | } else if (NumElts == 2) { | |||
1507 | // LoadV2 | |||
1508 | SmallVector<EVT, 4> LoadRetVTs; | |||
1509 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1510 | // If loading i1/i8 result, generate | |||
1511 | // load.b8 i16 | |||
1512 | // if i1 | |||
1513 | // trunc i16 to i1 | |||
1514 | LoadRetVTs.push_back(MVT::i16); | |||
1515 | LoadRetVTs.push_back(MVT::i16); | |||
1516 | } else { | |||
1517 | LoadRetVTs.push_back(EltVT); | |||
1518 | LoadRetVTs.push_back(EltVT); | |||
1519 | } | |||
1520 | LoadRetVTs.push_back(MVT::Other); | |||
1521 | LoadRetVTs.push_back(MVT::Glue); | |||
1522 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1523 | DAG.getConstant(0, dl, MVT::i32), InFlag}; | |||
1524 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1525 | NVPTXISD::LoadParamV2, dl, | |||
1526 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1527 | Chain = retval.getValue(2); | |||
1528 | InFlag = retval.getValue(3); | |||
1529 | SDValue Ret0 = retval.getValue(0); | |||
1530 | SDValue Ret1 = retval.getValue(1); | |||
1531 | if (needTruncate) { | |||
1532 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); | |||
1533 | InVals.push_back(Ret0); | |||
1534 | Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); | |||
1535 | InVals.push_back(Ret1); | |||
1536 | } else { | |||
1537 | InVals.push_back(Ret0); | |||
1538 | InVals.push_back(Ret1); | |||
1539 | } | |||
1540 | } else { | |||
1541 | // Split into N LoadV4 | |||
1542 | unsigned Ofst = 0; | |||
1543 | unsigned VecSize = 4; | |||
1544 | unsigned Opc = NVPTXISD::LoadParamV4; | |||
1545 | if (EltVT.getSizeInBits() == 64) { | |||
1546 | VecSize = 2; | |||
1547 | Opc = NVPTXISD::LoadParamV2; | |||
1548 | } | |||
1549 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
1550 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1551 | SmallVector<EVT, 8> LoadRetVTs; | |||
1552 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1553 | // If loading i1/i8 result, generate | |||
1554 | // load.b8 i16 | |||
1555 | // if i1 | |||
1556 | // trunc i16 to i1 | |||
1557 | for (unsigned j = 0; j < VecSize; ++j) | |||
1558 | LoadRetVTs.push_back(MVT::i16); | |||
1559 | } else { | |||
1560 | for (unsigned j = 0; j < VecSize; ++j) | |||
1561 | LoadRetVTs.push_back(EltVT); | |||
1562 | } | |||
1563 | LoadRetVTs.push_back(MVT::Other); | |||
1564 | LoadRetVTs.push_back(MVT::Glue); | |||
1565 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1566 | DAG.getConstant(Ofst, dl, MVT::i32), InFlag}; | |||
1567 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1568 | Opc, dl, DAG.getVTList(LoadRetVTs), | |||
1569 | LoadRetOps, EltVT, MachinePointerInfo()); | |||
1570 | if (VecSize == 2) { | |||
1571 | Chain = retval.getValue(2); | |||
1572 | InFlag = retval.getValue(3); | |||
1573 | } else { | |||
1574 | Chain = retval.getValue(4); | |||
1575 | InFlag = retval.getValue(5); | |||
1576 | } | |||
1577 | ||||
1578 | for (unsigned j = 0; j < VecSize; ++j) { | |||
1579 | if (i + j >= NumElts) | |||
1580 | break; | |||
1581 | SDValue Elt = retval.getValue(j); | |||
1582 | if (needTruncate) | |||
1583 | Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); | |||
1584 | InVals.push_back(Elt); | |||
1585 | } | |||
1586 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
1587 | } | |||
1588 | } | |||
1589 | } else { | |||
1590 | SmallVector<EVT, 16> VTs; | |||
1591 | SmallVector<uint64_t, 16> Offsets; | |||
1592 | ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); | |||
1593 | assert(VTs.size() == Ins.size() && "Bad value decomposition")((VTs.size() == Ins.size() && "Bad value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == Ins.size() && \"Bad value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1593, __PRETTY_FUNCTION__)); | |||
1594 | unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); | |||
1595 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { | |||
1596 | unsigned sz = VTs[i].getSizeInBits(); | |||
1597 | unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); | |||
1598 | bool needTruncate = sz < 8; | |||
1599 | if (VTs[i].isInteger() && (sz < 8)) | |||
1600 | sz = 8; | |||
1601 | ||||
1602 | SmallVector<EVT, 4> LoadRetVTs; | |||
1603 | EVT TheLoadType = VTs[i]; | |||
1604 | if (retTy->isIntegerTy() && | |||
| ||||
1605 | TD->getTypeAllocSizeInBits(retTy) < 32) { | |||
1606 | // This is for integer types only, and specifically not for | |||
1607 | // aggregates. | |||
1608 | LoadRetVTs.push_back(MVT::i32); | |||
1609 | TheLoadType = MVT::i32; | |||
1610 | } else if (sz < 16) { | |||
1611 | // If loading i1/i8 result, generate | |||
1612 | // load i8 (-> i16) | |||
1613 | // trunc i16 to i1/i8 | |||
1614 | LoadRetVTs.push_back(MVT::i16); | |||
1615 | } else | |||
1616 | LoadRetVTs.push_back(Ins[i].VT); | |||
1617 | LoadRetVTs.push_back(MVT::Other); | |||
1618 | LoadRetVTs.push_back(MVT::Glue); | |||
1619 | ||||
1620 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1621 | DAG.getConstant(Offsets[i], dl, MVT::i32), | |||
1622 | InFlag}; | |||
1623 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1624 | NVPTXISD::LoadParam, dl, | |||
1625 | DAG.getVTList(LoadRetVTs), LoadRetOps, | |||
1626 | TheLoadType, MachinePointerInfo(), AlignI); | |||
1627 | Chain = retval.getValue(1); | |||
1628 | InFlag = retval.getValue(2); | |||
1629 | SDValue Ret0 = retval.getValue(0); | |||
1630 | if (needTruncate) | |||
1631 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); | |||
1632 | InVals.push_back(Ret0); | |||
1633 | } | |||
1634 | } | |||
1635 | } | |||
1636 | ||||
1637 | Chain = DAG.getCALLSEQ_END(Chain, | |||
1638 | DAG.getIntPtrConstant(uniqueCallSite, dl, true), | |||
1639 | DAG.getIntPtrConstant(uniqueCallSite + 1, dl, | |||
1640 | true), | |||
1641 | InFlag, dl); | |||
1642 | uniqueCallSite++; | |||
1643 | ||||
1644 | // set isTailCall to false for now, until we figure out how to express | |||
1645 | // tail call optimization in PTX | |||
1646 | isTailCall = false; | |||
1647 | return Chain; | |||
1648 | } | |||
1649 | ||||
1650 | // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() | |||
1651 | // (see LegalizeDAG.cpp). This is slow and uses local memory. | |||
1652 | // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 | |||
1653 | SDValue | |||
1654 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { | |||
1655 | SDNode *Node = Op.getNode(); | |||
1656 | SDLoc dl(Node); | |||
1657 | SmallVector<SDValue, 8> Ops; | |||
1658 | unsigned NumOperands = Node->getNumOperands(); | |||
1659 | for (unsigned i = 0; i < NumOperands; ++i) { | |||
1660 | SDValue SubOp = Node->getOperand(i); | |||
1661 | EVT VVT = SubOp.getNode()->getValueType(0); | |||
1662 | EVT EltVT = VVT.getVectorElementType(); | |||
1663 | unsigned NumSubElem = VVT.getVectorNumElements(); | |||
1664 | for (unsigned j = 0; j < NumSubElem; ++j) { | |||
1665 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, | |||
1666 | DAG.getIntPtrConstant(j, dl))); | |||
1667 | } | |||
1668 | } | |||
1669 | return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); | |||
1670 | } | |||
1671 | ||||
1672 | /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which | |||
1673 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1674 | /// amount, or | |||
1675 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1676 | /// amount. | |||
1677 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, | |||
1678 | SelectionDAG &DAG) const { | |||
1679 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1679, __PRETTY_FUNCTION__)); | |||
1680 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD:: SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1680, __PRETTY_FUNCTION__)); | |||
1681 | ||||
1682 | EVT VT = Op.getValueType(); | |||
1683 | unsigned VTBits = VT.getSizeInBits(); | |||
1684 | SDLoc dl(Op); | |||
1685 | SDValue ShOpLo = Op.getOperand(0); | |||
1686 | SDValue ShOpHi = Op.getOperand(1); | |||
1687 | SDValue ShAmt = Op.getOperand(2); | |||
1688 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; | |||
1689 | ||||
1690 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | |||
1691 | ||||
1692 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1693 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1694 | // dHi = aHi >> Amt | |||
1695 | // dLo = shf.r.clamp aLo, aHi, Amt | |||
1696 | ||||
1697 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1698 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1699 | ShAmt); | |||
1700 | ||||
1701 | SDValue Ops[2] = { Lo, Hi }; | |||
1702 | return DAG.getMergeValues(Ops, dl); | |||
1703 | } | |||
1704 | else { | |||
1705 | ||||
1706 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1707 | // - if (Amt>=size) then | |||
1708 | // dLo = aHi >> (Amt-size) | |||
1709 | // dHi = aHi >> Amt (this is either all 0 or all 1) | |||
1710 | // else | |||
1711 | // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) | |||
1712 | // dHi = aHi >> Amt | |||
1713 | ||||
1714 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1715 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1716 | ShAmt); | |||
1717 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); | |||
1718 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1719 | DAG.getConstant(VTBits, dl, MVT::i32)); | |||
1720 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); | |||
1721 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1722 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); | |||
1723 | ||||
1724 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1725 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1726 | ISD::SETGE); | |||
1727 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1728 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1729 | ||||
1730 | SDValue Ops[2] = { Lo, Hi }; | |||
1731 | return DAG.getMergeValues(Ops, dl); | |||
1732 | } | |||
1733 | } | |||
1734 | ||||
1735 | /// LowerShiftLeftParts - Lower SHL_PARTS, which | |||
1736 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1737 | /// amount, or | |||
1738 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1739 | /// amount. | |||
1740 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, | |||
1741 | SelectionDAG &DAG) const { | |||
1742 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1742, __PRETTY_FUNCTION__)); | |||
1743 | assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1743, __PRETTY_FUNCTION__)); | |||
1744 | ||||
1745 | EVT VT = Op.getValueType(); | |||
1746 | unsigned VTBits = VT.getSizeInBits(); | |||
1747 | SDLoc dl(Op); | |||
1748 | SDValue ShOpLo = Op.getOperand(0); | |||
1749 | SDValue ShOpHi = Op.getOperand(1); | |||
1750 | SDValue ShAmt = Op.getOperand(2); | |||
1751 | ||||
1752 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | |||
1753 | ||||
1754 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1755 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1756 | // dHi = shf.l.clamp aLo, aHi, Amt | |||
1757 | // dLo = aLo << Amt | |||
1758 | ||||
1759 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1760 | ShAmt); | |||
1761 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1762 | ||||
1763 | SDValue Ops[2] = { Lo, Hi }; | |||
1764 | return DAG.getMergeValues(Ops, dl); | |||
1765 | } | |||
1766 | else { | |||
1767 | ||||
1768 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1769 | // - if (Amt>=size) then | |||
1770 | // dLo = aLo << Amt (all 0) | |||
1771 | // dLo = aLo << (Amt-size) | |||
1772 | // else | |||
1773 | // dLo = aLo << Amt | |||
1774 | // dHi = (aHi << Amt) | (aLo >> (size-Amt)) | |||
1775 | ||||
1776 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1777 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1778 | ShAmt); | |||
1779 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); | |||
1780 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1781 | DAG.getConstant(VTBits, dl, MVT::i32)); | |||
1782 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); | |||
1783 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1784 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); | |||
1785 | ||||
1786 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1787 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1788 | ISD::SETGE); | |||
1789 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1790 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1791 | ||||
1792 | SDValue Ops[2] = { Lo, Hi }; | |||
1793 | return DAG.getMergeValues(Ops, dl); | |||
1794 | } | |||
1795 | } | |||
1796 | ||||
1797 | SDValue | |||
1798 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||
1799 | switch (Op.getOpcode()) { | |||
1800 | case ISD::RETURNADDR: | |||
1801 | return SDValue(); | |||
1802 | case ISD::FRAMEADDR: | |||
1803 | return SDValue(); | |||
1804 | case ISD::GlobalAddress: | |||
1805 | return LowerGlobalAddress(Op, DAG); | |||
1806 | case ISD::INTRINSIC_W_CHAIN: | |||
1807 | return Op; | |||
1808 | case ISD::BUILD_VECTOR: | |||
1809 | case ISD::EXTRACT_SUBVECTOR: | |||
1810 | return Op; | |||
1811 | case ISD::CONCAT_VECTORS: | |||
1812 | return LowerCONCAT_VECTORS(Op, DAG); | |||
1813 | case ISD::STORE: | |||
1814 | return LowerSTORE(Op, DAG); | |||
1815 | case ISD::LOAD: | |||
1816 | return LowerLOAD(Op, DAG); | |||
1817 | case ISD::SHL_PARTS: | |||
1818 | return LowerShiftLeftParts(Op, DAG); | |||
1819 | case ISD::SRA_PARTS: | |||
1820 | case ISD::SRL_PARTS: | |||
1821 | return LowerShiftRightParts(Op, DAG); | |||
1822 | case ISD::SELECT: | |||
1823 | return LowerSelect(Op, DAG); | |||
1824 | default: | |||
1825 | llvm_unreachable("Custom lowering not defined for operation")::llvm::llvm_unreachable_internal("Custom lowering not defined for operation" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1825); | |||
1826 | } | |||
1827 | } | |||
1828 | ||||
1829 | SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { | |||
1830 | SDValue Op0 = Op->getOperand(0); | |||
1831 | SDValue Op1 = Op->getOperand(1); | |||
1832 | SDValue Op2 = Op->getOperand(2); | |||
1833 | SDLoc DL(Op.getNode()); | |||
1834 | ||||
1835 | assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1")((Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering enabled only for i1\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1835, __PRETTY_FUNCTION__)); | |||
1836 | ||||
1837 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); | |||
1838 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); | |||
1839 | SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); | |||
1840 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); | |||
1841 | ||||
1842 | return Trunc; | |||
1843 | } | |||
1844 | ||||
1845 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | |||
1846 | if (Op.getValueType() == MVT::i1) | |||
1847 | return LowerLOADi1(Op, DAG); | |||
1848 | else | |||
1849 | return SDValue(); | |||
1850 | } | |||
1851 | ||||
1852 | // v = ld i1* addr | |||
1853 | // => | |||
1854 | // v1 = ld i8* addr (-> i16) | |||
1855 | // v = trunc i16 to i1 | |||
1856 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { | |||
1857 | SDNode *Node = Op.getNode(); | |||
1858 | LoadSDNode *LD = cast<LoadSDNode>(Node); | |||
1859 | SDLoc dl(Node); | |||
1860 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD)((LD->getExtensionType() == ISD::NON_EXTLOAD) ? static_cast <void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1860, __PRETTY_FUNCTION__)); | |||
1861 | assert(Node->getValueType(0) == MVT::i1 &&((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1862, __PRETTY_FUNCTION__)) | |||
1862 | "Custom lowering for i1 load only")((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1862, __PRETTY_FUNCTION__)); | |||
1863 | SDValue newLD = | |||
1864 | DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), | |||
1865 | LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), | |||
1866 | LD->isInvariant(), LD->getAlignment()); | |||
1867 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); | |||
1868 | // The legalizer (the caller) is expecting two values from the legalized | |||
1869 | // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() | |||
1870 | // in LegalizeDAG.cpp which also uses MergeValues. | |||
1871 | SDValue Ops[] = { result, LD->getChain() }; | |||
1872 | return DAG.getMergeValues(Ops, dl); | |||
1873 | } | |||
1874 | ||||
1875 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | |||
1876 | EVT ValVT = Op.getOperand(1).getValueType(); | |||
1877 | if (ValVT == MVT::i1) | |||
1878 | return LowerSTOREi1(Op, DAG); | |||
1879 | else if (ValVT.isVector()) | |||
1880 | return LowerSTOREVector(Op, DAG); | |||
1881 | else | |||
1882 | return SDValue(); | |||
1883 | } | |||
1884 | ||||
1885 | SDValue | |||
1886 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { | |||
1887 | SDNode *N = Op.getNode(); | |||
1888 | SDValue Val = N->getOperand(1); | |||
1889 | SDLoc DL(N); | |||
1890 | EVT ValVT = Val.getValueType(); | |||
1891 | ||||
1892 | if (ValVT.isVector()) { | |||
1893 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
1894 | // legal. We can (and should) split that into 2 stores of <2 x double> here | |||
1895 | // but I'm leaving that as a TODO for now. | |||
1896 | if (!ValVT.isSimple()) | |||
1897 | return SDValue(); | |||
1898 | switch (ValVT.getSimpleVT().SimpleTy) { | |||
1899 | default: | |||
1900 | return SDValue(); | |||
1901 | case MVT::v2i8: | |||
1902 | case MVT::v2i16: | |||
1903 | case MVT::v2i32: | |||
1904 | case MVT::v2i64: | |||
1905 | case MVT::v2f32: | |||
1906 | case MVT::v2f64: | |||
1907 | case MVT::v4i8: | |||
1908 | case MVT::v4i16: | |||
1909 | case MVT::v4i32: | |||
1910 | case MVT::v4f32: | |||
1911 | // This is a "native" vector type | |||
1912 | break; | |||
1913 | } | |||
1914 | ||||
1915 | MemSDNode *MemSD = cast<MemSDNode>(N); | |||
1916 | const DataLayout *TD = getDataLayout(); | |||
1917 | ||||
1918 | unsigned Align = MemSD->getAlignment(); | |||
1919 | unsigned PrefAlign = | |||
1920 | TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); | |||
1921 | if (Align < PrefAlign) { | |||
1922 | // This store is not sufficiently aligned, so bail out and let this vector | |||
1923 | // store be scalarized. Note that we may still be able to emit smaller | |||
1924 | // vector stores. For example, if we are storing a <4 x float> with an | |||
1925 | // alignment of 8, this check will fail but the legalizer will try again | |||
1926 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
1927 | return SDValue(); | |||
1928 | } | |||
1929 | ||||
1930 | unsigned Opcode = 0; | |||
1931 | EVT EltVT = ValVT.getVectorElementType(); | |||
1932 | unsigned NumElts = ValVT.getVectorNumElements(); | |||
1933 | ||||
1934 | // Since StoreV2 is a target node, we cannot rely on DAG type legalization. | |||
1935 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
1936 | // stored type to i16 and propagate the "real" type as the memory type. | |||
1937 | bool NeedExt = false; | |||
1938 | if (EltVT.getSizeInBits() < 16) | |||
1939 | NeedExt = true; | |||
1940 | ||||
1941 | switch (NumElts) { | |||
1942 | default: | |||
1943 | return SDValue(); | |||
1944 | case 2: | |||
1945 | Opcode = NVPTXISD::StoreV2; | |||
1946 | break; | |||
1947 | case 4: { | |||
1948 | Opcode = NVPTXISD::StoreV4; | |||
1949 | break; | |||
1950 | } | |||
1951 | } | |||
1952 | ||||
1953 | SmallVector<SDValue, 8> Ops; | |||
1954 | ||||
1955 | // First is the chain | |||
1956 | Ops.push_back(N->getOperand(0)); | |||
1957 | ||||
1958 | // Then the split values | |||
1959 | for (unsigned i = 0; i < NumElts; ++i) { | |||
1960 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, | |||
1961 | DAG.getIntPtrConstant(i, DL)); | |||
1962 | if (NeedExt) | |||
1963 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); | |||
1964 | Ops.push_back(ExtVal); | |||
1965 | } | |||
1966 | ||||
1967 | // Then any remaining arguments | |||
1968 | Ops.append(N->op_begin() + 2, N->op_end()); | |||
1969 | ||||
1970 | SDValue NewSt = DAG.getMemIntrinsicNode( | |||
1971 | Opcode, DL, DAG.getVTList(MVT::Other), Ops, | |||
1972 | MemSD->getMemoryVT(), MemSD->getMemOperand()); | |||
1973 | ||||
1974 | //return DCI.CombineTo(N, NewSt, true); | |||
1975 | return NewSt; | |||
1976 | } | |||
1977 | ||||
1978 | return SDValue(); | |||
1979 | } | |||
1980 | ||||
1981 | // st i1 v, addr | |||
1982 | // => | |||
1983 | // v1 = zxt v to i16 | |||
1984 | // st.u8 i16, addr | |||
1985 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { | |||
1986 | SDNode *Node = Op.getNode(); | |||
1987 | SDLoc dl(Node); | |||
1988 | StoreSDNode *ST = cast<StoreSDNode>(Node); | |||
1989 | SDValue Tmp1 = ST->getChain(); | |||
1990 | SDValue Tmp2 = ST->getBasePtr(); | |||
1991 | SDValue Tmp3 = ST->getValue(); | |||
1992 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only")((Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only" ) ? static_cast<void> (0) : __assert_fail ("Tmp3.getValueType() == MVT::i1 && \"Custom lowering for i1 store only\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1992, __PRETTY_FUNCTION__)); | |||
1993 | unsigned Alignment = ST->getAlignment(); | |||
1994 | bool isVolatile = ST->isVolatile(); | |||
1995 | bool isNonTemporal = ST->isNonTemporal(); | |||
1996 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); | |||
1997 | SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, | |||
1998 | ST->getPointerInfo(), MVT::i8, isNonTemporal, | |||
1999 | isVolatile, Alignment); | |||
2000 | return Result; | |||
2001 | } | |||
2002 | ||||
2003 | SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, | |||
2004 | int idx, EVT v) const { | |||
2005 | std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); | |||
2006 | std::stringstream suffix; | |||
2007 | suffix << idx; | |||
2008 | *name += suffix.str(); | |||
2009 | return DAG.getTargetExternalSymbol(name->c_str(), v); | |||
2010 | } | |||
2011 | ||||
2012 | SDValue | |||
2013 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { | |||
2014 | std::string ParamSym; | |||
2015 | raw_string_ostream ParamStr(ParamSym); | |||
2016 | ||||
2017 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; | |||
2018 | ParamStr.flush(); | |||
2019 | ||||
2020 | std::string *SavedStr = | |||
2021 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); | |||
2022 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); | |||
2023 | } | |||
2024 | ||||
2025 | SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { | |||
2026 | return getExtSymb(DAG, ".HLPPARAM", idx); | |||
2027 | } | |||
2028 | ||||
2029 | // Check to see if the kernel argument is image*_t or sampler_t | |||
2030 | ||||
2031 | bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { | |||
2032 | static const char *const specialTypes[] = { "struct._image2d_t", | |||
2033 | "struct._image3d_t", | |||
2034 | "struct._sampler_t" }; | |||
2035 | ||||
2036 | const Type *Ty = arg->getType(); | |||
2037 | const PointerType *PTy = dyn_cast<PointerType>(Ty); | |||
2038 | ||||
2039 | if (!PTy) | |||
2040 | return false; | |||
2041 | ||||
2042 | if (!context) | |||
2043 | return false; | |||
2044 | ||||
2045 | const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); | |||
2046 | const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; | |||
2047 | ||||
2048 | for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) | |||
2049 | if (TypeName == specialTypes[i]) | |||
2050 | return true; | |||
2051 | ||||
2052 | return false; | |||
2053 | } | |||
2054 | ||||
2055 | SDValue NVPTXTargetLowering::LowerFormalArguments( | |||
2056 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | |||
2057 | const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, | |||
2058 | SmallVectorImpl<SDValue> &InVals) const { | |||
2059 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2060 | const DataLayout *TD = getDataLayout(); | |||
2061 | ||||
2062 | const Function *F = MF.getFunction(); | |||
2063 | const AttributeSet &PAL = F->getAttributes(); | |||
2064 | const TargetLowering *TLI = STI.getTargetLowering(); | |||
2065 | ||||
2066 | SDValue Root = DAG.getRoot(); | |||
2067 | std::vector<SDValue> OutChains; | |||
2068 | ||||
2069 | bool isKernel = llvm::isKernelFunction(*F); | |||
2070 | bool isABI = (STI.getSmVersion() >= 20); | |||
2071 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2071, __PRETTY_FUNCTION__)); | |||
2072 | if (!isABI) | |||
2073 | return Chain; | |||
2074 | ||||
2075 | std::vector<Type *> argTypes; | |||
2076 | std::vector<const Argument *> theArgs; | |||
2077 | for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); | |||
2078 | I != E; ++I) { | |||
2079 | theArgs.push_back(I); | |||
2080 | argTypes.push_back(I->getType()); | |||
2081 | } | |||
2082 | // argTypes.size() (or theArgs.size()) and Ins.size() need not match. | |||
2083 | // Ins.size() will be larger | |||
2084 | // * if there is an aggregate argument with multiple fields (each field | |||
2085 | // showing up separately in Ins) | |||
2086 | // * if there is a vector argument with more than typical vector-length | |||
2087 | // elements (generally if more than 4) where each vector element is | |||
2088 | // individually present in Ins. | |||
2089 | // So a different index should be used for indexing into Ins. | |||
2090 | // See similar issue in LowerCall. | |||
2091 | unsigned InsIdx = 0; | |||
2092 | ||||
2093 | int idx = 0; | |||
2094 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { | |||
2095 | Type *Ty = argTypes[i]; | |||
2096 | ||||
2097 | // If the kernel argument is image*_t or sampler_t, convert it to | |||
2098 | // a i32 constant holding the parameter position. This can later | |||
2099 | // matched in the AsmPrinter to output the correct mangled name. | |||
2100 | if (isImageOrSamplerVal( | |||
2101 | theArgs[i], | |||
2102 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() | |||
2103 | : nullptr))) { | |||
2104 | assert(isKernel && "Only kernels can have image/sampler params")((isKernel && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("isKernel && \"Only kernels can have image/sampler params\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2104, __PRETTY_FUNCTION__)); | |||
2105 | InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); | |||
2106 | continue; | |||
2107 | } | |||
2108 | ||||
2109 | if (theArgs[i]->use_empty()) { | |||
2110 | // argument is dead | |||
2111 | if (Ty->isAggregateType()) { | |||
2112 | SmallVector<EVT, 16> vtparts; | |||
2113 | ||||
2114 | ComputePTXValueVTs(*this, Ty, vtparts); | |||
2115 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2115, __PRETTY_FUNCTION__)); | |||
2116 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2117 | ++parti) { | |||
2118 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2119 | ++InsIdx; | |||
2120 | } | |||
2121 | if (vtparts.size() > 0) | |||
2122 | --InsIdx; | |||
2123 | continue; | |||
2124 | } | |||
2125 | if (Ty->isVectorTy()) { | |||
2126 | EVT ObjectVT = getValueType(Ty); | |||
2127 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); | |||
2128 | for (unsigned parti = 0; parti < NumRegs; ++parti) { | |||
2129 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2130 | ++InsIdx; | |||
2131 | } | |||
2132 | if (NumRegs > 0) | |||
2133 | --InsIdx; | |||
2134 | continue; | |||
2135 | } | |||
2136 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2137 | continue; | |||
2138 | } | |||
2139 | ||||
2140 | // In the following cases, assign a node order of "idx+1" | |||
2141 | // to newly created nodes. The SDNodes for params have to | |||
2142 | // appear in the same order as their order of appearance | |||
2143 | // in the original function. "idx+1" holds that order. | |||
2144 | if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { | |||
2145 | if (Ty->isAggregateType()) { | |||
2146 | SmallVector<EVT, 16> vtparts; | |||
2147 | SmallVector<uint64_t, 16> offsets; | |||
2148 | ||||
2149 | // NOTE: Here, we lose the ability to issue vector loads for vectors | |||
2150 | // that are a part of a struct. This should be investigated in the | |||
2151 | // future. | |||
2152 | ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); | |||
2153 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2153, __PRETTY_FUNCTION__)); | |||
2154 | bool aggregateIsPacked = false; | |||
2155 | if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) | |||
2156 | aggregateIsPacked = STy->isPacked(); | |||
2157 | ||||
2158 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2159 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2160 | ++parti) { | |||
2161 | EVT partVT = vtparts[parti]; | |||
2162 | Value *srcValue = Constant::getNullValue( | |||
2163 | PointerType::get(partVT.getTypeForEVT(F->getContext()), | |||
2164 | llvm::ADDRESS_SPACE_PARAM)); | |||
2165 | SDValue srcAddr = | |||
2166 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, | |||
2167 | DAG.getConstant(offsets[parti], dl, getPointerTy())); | |||
2168 | unsigned partAlign = | |||
2169 | aggregateIsPacked ? 1 | |||
2170 | : TD->getABITypeAlignment( | |||
2171 | partVT.getTypeForEVT(F->getContext())); | |||
2172 | SDValue p; | |||
2173 | if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { | |||
2174 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2175 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2176 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, | |||
2177 | MachinePointerInfo(srcValue), partVT, false, | |||
2178 | false, false, partAlign); | |||
2179 | } else { | |||
2180 | p = DAG.getLoad(partVT, dl, Root, srcAddr, | |||
2181 | MachinePointerInfo(srcValue), false, false, false, | |||
2182 | partAlign); | |||
2183 | } | |||
2184 | if (p.getNode()) | |||
2185 | p.getNode()->setIROrder(idx + 1); | |||
2186 | InVals.push_back(p); | |||
2187 | ++InsIdx; | |||
2188 | } | |||
2189 | if (vtparts.size() > 0) | |||
2190 | --InsIdx; | |||
2191 | continue; | |||
2192 | } | |||
2193 | if (Ty->isVectorTy()) { | |||
2194 | EVT ObjectVT = getValueType(Ty); | |||
2195 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2196 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
2197 | assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2198, __PRETTY_FUNCTION__)) | |||
2198 | "Vector was not scalarized")((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2198, __PRETTY_FUNCTION__)); | |||
2199 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
2200 | ||||
2201 | // V1 load | |||
2202 | // f32 = load ... | |||
2203 | if (NumElts == 1) { | |||
2204 | // We only have one element, so just directly load it | |||
2205 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2206 | EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2207 | SDValue P = DAG.getLoad( | |||
2208 | EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, | |||
2209 | false, true, | |||
2210 | TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); | |||
2211 | if (P.getNode()) | |||
2212 | P.getNode()->setIROrder(idx + 1); | |||
2213 | ||||
2214 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2215 | P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); | |||
2216 | InVals.push_back(P); | |||
2217 | ++InsIdx; | |||
2218 | } else if (NumElts == 2) { | |||
2219 | // V2 load | |||
2220 | // f32,f32 = load ... | |||
2221 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); | |||
2222 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2223 | VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2224 | SDValue P = DAG.getLoad( | |||
2225 | VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, | |||
2226 | false, true, | |||
2227 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); | |||
2228 | if (P.getNode()) | |||
2229 | P.getNode()->setIROrder(idx + 1); | |||
2230 | ||||
2231 | SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2232 | DAG.getIntPtrConstant(0, dl)); | |||
2233 | SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2234 | DAG.getIntPtrConstant(1, dl)); | |||
2235 | ||||
2236 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { | |||
2237 | Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); | |||
2238 | Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); | |||
2239 | } | |||
2240 | ||||
2241 | InVals.push_back(Elt0); | |||
2242 | InVals.push_back(Elt1); | |||
2243 | InsIdx += 2; | |||
2244 | } else { | |||
2245 | // V4 loads | |||
2246 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
2247 | // the | |||
2248 | // vector will be expanded to a power of 2 elements, so we know we can | |||
2249 | // always round up to the next multiple of 4 when creating the vector | |||
2250 | // loads. | |||
2251 | // e.g. 4 elem => 1 ld.v4 | |||
2252 | // 6 elem => 2 ld.v4 | |||
2253 | // 8 elem => 2 ld.v4 | |||
2254 | // 11 elem => 3 ld.v4 | |||
2255 | unsigned VecSize = 4; | |||
2256 | if (EltVT.getSizeInBits() == 64) { | |||
2257 | VecSize = 2; | |||
2258 | } | |||
2259 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2260 | unsigned Ofst = 0; | |||
2261 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2262 | Value *SrcValue = Constant::getNullValue( | |||
2263 | PointerType::get(VecVT.getTypeForEVT(F->getContext()), | |||
2264 | llvm::ADDRESS_SPACE_PARAM)); | |||
2265 | SDValue SrcAddr = | |||
2266 | DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, | |||
2267 | DAG.getConstant(Ofst, dl, getPointerTy())); | |||
2268 | SDValue P = DAG.getLoad( | |||
2269 | VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, | |||
2270 | false, true, | |||
2271 | TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); | |||
2272 | if (P.getNode()) | |||
2273 | P.getNode()->setIROrder(idx + 1); | |||
2274 | ||||
2275 | for (unsigned j = 0; j < VecSize; ++j) { | |||
2276 | if (i + j >= NumElts) | |||
2277 | break; | |||
2278 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2279 | DAG.getIntPtrConstant(j, dl)); | |||
2280 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2281 | Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); | |||
2282 | InVals.push_back(Elt); | |||
2283 | } | |||
2284 | Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2285 | } | |||
2286 | InsIdx += NumElts; | |||
2287 | } | |||
2288 | ||||
2289 | if (NumElts > 0) | |||
2290 | --InsIdx; | |||
2291 | continue; | |||
2292 | } | |||
2293 | // A plain scalar. | |||
2294 | EVT ObjectVT = getValueType(Ty); | |||
2295 | // If ABI, load from the param symbol | |||
2296 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2297 | Value *srcValue = Constant::getNullValue(PointerType::get( | |||
2298 | ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2299 | SDValue p; | |||
2300 | if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { | |||
2301 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2302 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2303 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, | |||
2304 | MachinePointerInfo(srcValue), ObjectVT, false, false, | |||
2305 | false, | |||
2306 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2307 | } else { | |||
2308 | p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, | |||
2309 | MachinePointerInfo(srcValue), false, false, false, | |||
2310 | TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2311 | } | |||
2312 | if (p.getNode()) | |||
2313 | p.getNode()->setIROrder(idx + 1); | |||
2314 | InVals.push_back(p); | |||
2315 | continue; | |||
2316 | } | |||
2317 | ||||
2318 | // Param has ByVal attribute | |||
2319 | // Return MoveParam(param symbol). | |||
2320 | // Ideally, the param symbol can be returned directly, | |||
2321 | // but when SDNode builder decides to use it in a CopyToReg(), | |||
2322 | // machine instruction fails because TargetExternalSymbol | |||
2323 | // (not lowered) is target dependent, and CopyToReg assumes | |||
2324 | // the source is lowered. | |||
2325 | EVT ObjectVT = getValueType(Ty); | |||
2326 | assert(ObjectVT == Ins[InsIdx].VT &&((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2327, __PRETTY_FUNCTION__)) | |||
2327 | "Ins type did not match function type")((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2327, __PRETTY_FUNCTION__)); | |||
2328 | SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); | |||
2329 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); | |||
2330 | if (p.getNode()) | |||
2331 | p.getNode()->setIROrder(idx + 1); | |||
2332 | if (isKernel) | |||
2333 | InVals.push_back(p); | |||
2334 | else { | |||
2335 | SDValue p2 = DAG.getNode( | |||
2336 | ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, | |||
2337 | DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p); | |||
2338 | InVals.push_back(p2); | |||
2339 | } | |||
2340 | } | |||
2341 | ||||
2342 | // Clang will check explicit VarArg and issue error if any. However, Clang | |||
2343 | // will let code with | |||
2344 | // implicit var arg like f() pass. See bug 617733. | |||
2345 | // We treat this case as if the arg list is empty. | |||
2346 | // if (F.isVarArg()) { | |||
2347 | // assert(0 && "VarArg not supported yet!"); | |||
2348 | //} | |||
2349 | ||||
2350 | if (!OutChains.empty()) | |||
2351 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); | |||
2352 | ||||
2353 | return Chain; | |||
2354 | } | |||
2355 | ||||
2356 | ||||
2357 | SDValue | |||
2358 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | |||
2359 | bool isVarArg, | |||
2360 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
2361 | const SmallVectorImpl<SDValue> &OutVals, | |||
2362 | SDLoc dl, SelectionDAG &DAG) const { | |||
2363 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2364 | const Function *F = MF.getFunction(); | |||
2365 | Type *RetTy = F->getReturnType(); | |||
2366 | const DataLayout *TD = getDataLayout(); | |||
2367 | ||||
2368 | bool isABI = (STI.getSmVersion() >= 20); | |||
2369 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2369, __PRETTY_FUNCTION__)); | |||
2370 | if (!isABI) | |||
2371 | return Chain; | |||
2372 | ||||
2373 | if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { | |||
2374 | // If we have a vector type, the OutVals array will be the scalarized | |||
2375 | // components and we have combine them into 1 or more vector stores. | |||
2376 | unsigned NumElts = VTy->getNumElements(); | |||
2377 | assert(NumElts == Outs.size() && "Bad scalarization of return value")((NumElts == Outs.size() && "Bad scalarization of return value" ) ? static_cast<void> (0) : __assert_fail ("NumElts == Outs.size() && \"Bad scalarization of return value\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2377, __PRETTY_FUNCTION__)); | |||
2378 | ||||
2379 | // const_cast can be removed in later LLVM versions | |||
2380 | EVT EltVT = getValueType(RetTy).getVectorElementType(); | |||
2381 | bool NeedExtend = false; | |||
2382 | if (EltVT.getSizeInBits() < 16) | |||
2383 | NeedExtend = true; | |||
2384 | ||||
2385 | // V1 store | |||
2386 | if (NumElts == 1) { | |||
2387 | SDValue StoreVal = OutVals[0]; | |||
2388 | // We only have one element, so just directly store it | |||
2389 | if (NeedExtend) | |||
2390 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
2391 | SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal }; | |||
2392 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2393 | DAG.getVTList(MVT::Other), Ops, | |||
2394 | EltVT, MachinePointerInfo()); | |||
2395 | ||||
2396 | } else if (NumElts == 2) { | |||
2397 | // V2 store | |||
2398 | SDValue StoreVal0 = OutVals[0]; | |||
2399 | SDValue StoreVal1 = OutVals[1]; | |||
2400 | ||||
2401 | if (NeedExtend) { | |||
2402 | StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); | |||
2403 | StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); | |||
2404 | } | |||
2405 | ||||
2406 | SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0, | |||
2407 | StoreVal1 }; | |||
2408 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, | |||
2409 | DAG.getVTList(MVT::Other), Ops, | |||
2410 | EltVT, MachinePointerInfo()); | |||
2411 | } else { | |||
2412 | // V4 stores | |||
2413 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the | |||
2414 | // vector will be expanded to a power of 2 elements, so we know we can | |||
2415 | // always round up to the next multiple of 4 when creating the vector | |||
2416 | // stores. | |||
2417 | // e.g. 4 elem => 1 st.v4 | |||
2418 | // 6 elem => 2 st.v4 | |||
2419 | // 8 elem => 2 st.v4 | |||
2420 | // 11 elem => 3 st.v4 | |||
2421 | ||||
2422 | unsigned VecSize = 4; | |||
2423 | if (OutVals[0].getValueType().getSizeInBits() == 64) | |||
2424 | VecSize = 2; | |||
2425 | ||||
2426 | unsigned Offset = 0; | |||
2427 | ||||
2428 | EVT VecVT = | |||
2429 | EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2430 | unsigned PerStoreOffset = | |||
2431 | TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2432 | ||||
2433 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2434 | // Get values | |||
2435 | SDValue StoreVal; | |||
2436 | SmallVector<SDValue, 8> Ops; | |||
2437 | Ops.push_back(Chain); | |||
2438 | Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32)); | |||
2439 | unsigned Opc = NVPTXISD::StoreRetvalV2; | |||
2440 | EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); | |||
2441 | ||||
2442 | StoreVal = OutVals[i]; | |||
2443 | if (NeedExtend) | |||
2444 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2445 | Ops.push_back(StoreVal); | |||
2446 | ||||
2447 | if (i + 1 < NumElts) { | |||
2448 | StoreVal = OutVals[i + 1]; | |||
2449 | if (NeedExtend) | |||
2450 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2451 | } else { | |||
2452 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2453 | } | |||
2454 | Ops.push_back(StoreVal); | |||
2455 | ||||
2456 | if (VecSize == 4) { | |||
2457 | Opc = NVPTXISD::StoreRetvalV4; | |||
2458 | if (i + 2 < NumElts) { | |||
2459 | StoreVal = OutVals[i + 2]; | |||
2460 | if (NeedExtend) | |||
2461 | StoreVal = | |||
2462 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2463 | } else { | |||
2464 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2465 | } | |||
2466 | Ops.push_back(StoreVal); | |||
2467 | ||||
2468 | if (i + 3 < NumElts) { | |||
2469 | StoreVal = OutVals[i + 3]; | |||
2470 | if (NeedExtend) | |||
2471 | StoreVal = | |||
2472 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2473 | } else { | |||
2474 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2475 | } | |||
2476 | Ops.push_back(StoreVal); | |||
2477 | } | |||
2478 | ||||
2479 | // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); | |||
2480 | Chain = | |||
2481 | DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, | |||
2482 | EltVT, MachinePointerInfo()); | |||
2483 | Offset += PerStoreOffset; | |||
2484 | } | |||
2485 | } | |||
2486 | } else { | |||
2487 | SmallVector<EVT, 16> ValVTs; | |||
2488 | SmallVector<uint64_t, 16> Offsets; | |||
2489 | ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); | |||
2490 | assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition")((ValVTs.size() == OutVals.size() && "Bad return value decomposition" ) ? static_cast<void> (0) : __assert_fail ("ValVTs.size() == OutVals.size() && \"Bad return value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2490, __PRETTY_FUNCTION__)); | |||
2491 | ||||
2492 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { | |||
2493 | SDValue theVal = OutVals[i]; | |||
2494 | EVT TheValType = theVal.getValueType(); | |||
2495 | unsigned numElems = 1; | |||
2496 | if (TheValType.isVector()) | |||
2497 | numElems = TheValType.getVectorNumElements(); | |||
2498 | for (unsigned j = 0, je = numElems; j != je; ++j) { | |||
2499 | SDValue TmpVal = theVal; | |||
2500 | if (TheValType.isVector()) | |||
2501 | TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, | |||
2502 | TheValType.getVectorElementType(), TmpVal, | |||
2503 | DAG.getIntPtrConstant(j, dl)); | |||
2504 | EVT TheStoreType = ValVTs[i]; | |||
2505 | if (RetTy->isIntegerTy() && | |||
2506 | TD->getTypeAllocSizeInBits(RetTy) < 32) { | |||
2507 | // The following zero-extension is for integer types only, and | |||
2508 | // specifically not for aggregates. | |||
2509 | TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); | |||
2510 | TheStoreType = MVT::i32; | |||
2511 | } | |||
2512 | else if (TmpVal.getValueType().getSizeInBits() < 16) | |||
2513 | TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); | |||
2514 | ||||
2515 | SDValue Ops[] = { | |||
2516 | Chain, | |||
2517 | DAG.getConstant(Offsets[i], dl, MVT::i32), | |||
2518 | TmpVal }; | |||
2519 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2520 | DAG.getVTList(MVT::Other), Ops, | |||
2521 | TheStoreType, | |||
2522 | MachinePointerInfo()); | |||
2523 | } | |||
2524 | } | |||
2525 | } | |||
2526 | ||||
2527 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); | |||
2528 | } | |||
2529 | ||||
2530 | ||||
2531 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( | |||
2532 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, | |||
2533 | SelectionDAG &DAG) const { | |||
2534 | if (Constraint.length() > 1) | |||
2535 | return; | |||
2536 | else | |||
2537 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); | |||
2538 | } | |||
2539 | ||||
2540 | // NVPTX suuport vector of legal types of any length in Intrinsics because the | |||
2541 | // NVPTX specific type legalizer | |||
2542 | // will legalize them to the PTX supported length. | |||
2543 | bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { | |||
2544 | if (isTypeLegal(VT)) | |||
2545 | return true; | |||
2546 | if (VT.isVector()) { | |||
2547 | MVT eVT = VT.getVectorElementType(); | |||
2548 | if (isTypeLegal(eVT)) | |||
2549 | return true; | |||
2550 | } | |||
2551 | return false; | |||
2552 | } | |||
2553 | ||||
2554 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { | |||
2555 | switch (Intrinsic) { | |||
2556 | default: | |||
2557 | return 0; | |||
2558 | ||||
2559 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
2560 | return NVPTXISD::Tex1DFloatS32; | |||
2561 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
2562 | return NVPTXISD::Tex1DFloatFloat; | |||
2563 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
2564 | return NVPTXISD::Tex1DFloatFloatLevel; | |||
2565 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
2566 | return NVPTXISD::Tex1DFloatFloatGrad; | |||
2567 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
2568 | return NVPTXISD::Tex1DS32S32; | |||
2569 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
2570 | return NVPTXISD::Tex1DS32Float; | |||
2571 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
2572 | return NVPTXISD::Tex1DS32FloatLevel; | |||
2573 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
2574 | return NVPTXISD::Tex1DS32FloatGrad; | |||
2575 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
2576 | return NVPTXISD::Tex1DU32S32; | |||
2577 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
2578 | return NVPTXISD::Tex1DU32Float; | |||
2579 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
2580 | return NVPTXISD::Tex1DU32FloatLevel; | |||
2581 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
2582 | return NVPTXISD::Tex1DU32FloatGrad; | |||
2583 | ||||
2584 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
2585 | return NVPTXISD::Tex1DArrayFloatS32; | |||
2586 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
2587 | return NVPTXISD::Tex1DArrayFloatFloat; | |||
2588 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
2589 | return NVPTXISD::Tex1DArrayFloatFloatLevel; | |||
2590 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
2591 | return NVPTXISD::Tex1DArrayFloatFloatGrad; | |||
2592 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
2593 | return NVPTXISD::Tex1DArrayS32S32; | |||
2594 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
2595 | return NVPTXISD::Tex1DArrayS32Float; | |||
2596 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
2597 | return NVPTXISD::Tex1DArrayS32FloatLevel; | |||
2598 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
2599 | return NVPTXISD::Tex1DArrayS32FloatGrad; | |||
2600 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
2601 | return NVPTXISD::Tex1DArrayU32S32; | |||
2602 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
2603 | return NVPTXISD::Tex1DArrayU32Float; | |||
2604 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
2605 | return NVPTXISD::Tex1DArrayU32FloatLevel; | |||
2606 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
2607 | return NVPTXISD::Tex1DArrayU32FloatGrad; | |||
2608 | ||||
2609 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
2610 | return NVPTXISD::Tex2DFloatS32; | |||
2611 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
2612 | return NVPTXISD::Tex2DFloatFloat; | |||
2613 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
2614 | return NVPTXISD::Tex2DFloatFloatLevel; | |||
2615 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
2616 | return NVPTXISD::Tex2DFloatFloatGrad; | |||
2617 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
2618 | return NVPTXISD::Tex2DS32S32; | |||
2619 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
2620 | return NVPTXISD::Tex2DS32Float; | |||
2621 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
2622 | return NVPTXISD::Tex2DS32FloatLevel; | |||
2623 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
2624 | return NVPTXISD::Tex2DS32FloatGrad; | |||
2625 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
2626 | return NVPTXISD::Tex2DU32S32; | |||
2627 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
2628 | return NVPTXISD::Tex2DU32Float; | |||
2629 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
2630 | return NVPTXISD::Tex2DU32FloatLevel; | |||
2631 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
2632 | return NVPTXISD::Tex2DU32FloatGrad; | |||
2633 | ||||
2634 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
2635 | return NVPTXISD::Tex2DArrayFloatS32; | |||
2636 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
2637 | return NVPTXISD::Tex2DArrayFloatFloat; | |||
2638 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
2639 | return NVPTXISD::Tex2DArrayFloatFloatLevel; | |||
2640 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
2641 | return NVPTXISD::Tex2DArrayFloatFloatGrad; | |||
2642 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
2643 | return NVPTXISD::Tex2DArrayS32S32; | |||
2644 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
2645 | return NVPTXISD::Tex2DArrayS32Float; | |||
2646 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
2647 | return NVPTXISD::Tex2DArrayS32FloatLevel; | |||
2648 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
2649 | return NVPTXISD::Tex2DArrayS32FloatGrad; | |||
2650 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
2651 | return NVPTXISD::Tex2DArrayU32S32; | |||
2652 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
2653 | return NVPTXISD::Tex2DArrayU32Float; | |||
2654 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
2655 | return NVPTXISD::Tex2DArrayU32FloatLevel; | |||
2656 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
2657 | return NVPTXISD::Tex2DArrayU32FloatGrad; | |||
2658 | ||||
2659 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
2660 | return NVPTXISD::Tex3DFloatS32; | |||
2661 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
2662 | return NVPTXISD::Tex3DFloatFloat; | |||
2663 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
2664 | return NVPTXISD::Tex3DFloatFloatLevel; | |||
2665 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
2666 | return NVPTXISD::Tex3DFloatFloatGrad; | |||
2667 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
2668 | return NVPTXISD::Tex3DS32S32; | |||
2669 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
2670 | return NVPTXISD::Tex3DS32Float; | |||
2671 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
2672 | return NVPTXISD::Tex3DS32FloatLevel; | |||
2673 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
2674 | return NVPTXISD::Tex3DS32FloatGrad; | |||
2675 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
2676 | return NVPTXISD::Tex3DU32S32; | |||
2677 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
2678 | return NVPTXISD::Tex3DU32Float; | |||
2679 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
2680 | return NVPTXISD::Tex3DU32FloatLevel; | |||
2681 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
2682 | return NVPTXISD::Tex3DU32FloatGrad; | |||
2683 | ||||
2684 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
2685 | return NVPTXISD::TexCubeFloatFloat; | |||
2686 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
2687 | return NVPTXISD::TexCubeFloatFloatLevel; | |||
2688 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
2689 | return NVPTXISD::TexCubeS32Float; | |||
2690 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
2691 | return NVPTXISD::TexCubeS32FloatLevel; | |||
2692 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
2693 | return NVPTXISD::TexCubeU32Float; | |||
2694 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
2695 | return NVPTXISD::TexCubeU32FloatLevel; | |||
2696 | ||||
2697 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
2698 | return NVPTXISD::TexCubeArrayFloatFloat; | |||
2699 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
2700 | return NVPTXISD::TexCubeArrayFloatFloatLevel; | |||
2701 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
2702 | return NVPTXISD::TexCubeArrayS32Float; | |||
2703 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
2704 | return NVPTXISD::TexCubeArrayS32FloatLevel; | |||
2705 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
2706 | return NVPTXISD::TexCubeArrayU32Float; | |||
2707 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
2708 | return NVPTXISD::TexCubeArrayU32FloatLevel; | |||
2709 | ||||
2710 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
2711 | return NVPTXISD::Tld4R2DFloatFloat; | |||
2712 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
2713 | return NVPTXISD::Tld4G2DFloatFloat; | |||
2714 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
2715 | return NVPTXISD::Tld4B2DFloatFloat; | |||
2716 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
2717 | return NVPTXISD::Tld4A2DFloatFloat; | |||
2718 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
2719 | return NVPTXISD::Tld4R2DS64Float; | |||
2720 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
2721 | return NVPTXISD::Tld4G2DS64Float; | |||
2722 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
2723 | return NVPTXISD::Tld4B2DS64Float; | |||
2724 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
2725 | return NVPTXISD::Tld4A2DS64Float; | |||
2726 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
2727 | return NVPTXISD::Tld4R2DU64Float; | |||
2728 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
2729 | return NVPTXISD::Tld4G2DU64Float; | |||
2730 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
2731 | return NVPTXISD::Tld4B2DU64Float; | |||
2732 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
2733 | return NVPTXISD::Tld4A2DU64Float; | |||
2734 | ||||
2735 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
2736 | return NVPTXISD::TexUnified1DFloatS32; | |||
2737 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
2738 | return NVPTXISD::TexUnified1DFloatFloat; | |||
2739 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
2740 | return NVPTXISD::TexUnified1DFloatFloatLevel; | |||
2741 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
2742 | return NVPTXISD::TexUnified1DFloatFloatGrad; | |||
2743 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
2744 | return NVPTXISD::TexUnified1DS32S32; | |||
2745 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
2746 | return NVPTXISD::TexUnified1DS32Float; | |||
2747 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
2748 | return NVPTXISD::TexUnified1DS32FloatLevel; | |||
2749 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
2750 | return NVPTXISD::TexUnified1DS32FloatGrad; | |||
2751 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
2752 | return NVPTXISD::TexUnified1DU32S32; | |||
2753 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
2754 | return NVPTXISD::TexUnified1DU32Float; | |||
2755 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
2756 | return NVPTXISD::TexUnified1DU32FloatLevel; | |||
2757 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
2758 | return NVPTXISD::TexUnified1DU32FloatGrad; | |||
2759 | ||||
2760 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
2761 | return NVPTXISD::TexUnified1DArrayFloatS32; | |||
2762 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
2763 | return NVPTXISD::TexUnified1DArrayFloatFloat; | |||
2764 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
2765 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; | |||
2766 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
2767 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; | |||
2768 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
2769 | return NVPTXISD::TexUnified1DArrayS32S32; | |||
2770 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
2771 | return NVPTXISD::TexUnified1DArrayS32Float; | |||
2772 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
2773 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; | |||
2774 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
2775 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; | |||
2776 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
2777 | return NVPTXISD::TexUnified1DArrayU32S32; | |||
2778 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
2779 | return NVPTXISD::TexUnified1DArrayU32Float; | |||
2780 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
2781 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; | |||
2782 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
2783 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; | |||
2784 | ||||
2785 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
2786 | return NVPTXISD::TexUnified2DFloatS32; | |||
2787 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
2788 | return NVPTXISD::TexUnified2DFloatFloat; | |||
2789 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
2790 | return NVPTXISD::TexUnified2DFloatFloatLevel; | |||
2791 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
2792 | return NVPTXISD::TexUnified2DFloatFloatGrad; | |||
2793 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
2794 | return NVPTXISD::TexUnified2DS32S32; | |||
2795 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
2796 | return NVPTXISD::TexUnified2DS32Float; | |||
2797 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
2798 | return NVPTXISD::TexUnified2DS32FloatLevel; | |||
2799 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
2800 | return NVPTXISD::TexUnified2DS32FloatGrad; | |||
2801 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
2802 | return NVPTXISD::TexUnified2DU32S32; | |||
2803 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
2804 | return NVPTXISD::TexUnified2DU32Float; | |||
2805 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
2806 | return NVPTXISD::TexUnified2DU32FloatLevel; | |||
2807 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
2808 | return NVPTXISD::TexUnified2DU32FloatGrad; | |||
2809 | ||||
2810 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
2811 | return NVPTXISD::TexUnified2DArrayFloatS32; | |||
2812 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
2813 | return NVPTXISD::TexUnified2DArrayFloatFloat; | |||
2814 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
2815 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; | |||
2816 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
2817 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; | |||
2818 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
2819 | return NVPTXISD::TexUnified2DArrayS32S32; | |||
2820 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
2821 | return NVPTXISD::TexUnified2DArrayS32Float; | |||
2822 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
2823 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; | |||
2824 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
2825 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; | |||
2826 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
2827 | return NVPTXISD::TexUnified2DArrayU32S32; | |||
2828 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
2829 | return NVPTXISD::TexUnified2DArrayU32Float; | |||
2830 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
2831 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; | |||
2832 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
2833 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; | |||
2834 | ||||
2835 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
2836 | return NVPTXISD::TexUnified3DFloatS32; | |||
2837 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
2838 | return NVPTXISD::TexUnified3DFloatFloat; | |||
2839 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
2840 | return NVPTXISD::TexUnified3DFloatFloatLevel; | |||
2841 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
2842 | return NVPTXISD::TexUnified3DFloatFloatGrad; | |||
2843 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
2844 | return NVPTXISD::TexUnified3DS32S32; | |||
2845 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
2846 | return NVPTXISD::TexUnified3DS32Float; | |||
2847 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
2848 | return NVPTXISD::TexUnified3DS32FloatLevel; | |||
2849 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
2850 | return NVPTXISD::TexUnified3DS32FloatGrad; | |||
2851 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
2852 | return NVPTXISD::TexUnified3DU32S32; | |||
2853 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
2854 | return NVPTXISD::TexUnified3DU32Float; | |||
2855 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
2856 | return NVPTXISD::TexUnified3DU32FloatLevel; | |||
2857 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
2858 | return NVPTXISD::TexUnified3DU32FloatGrad; | |||
2859 | ||||
2860 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
2861 | return NVPTXISD::TexUnifiedCubeFloatFloat; | |||
2862 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
2863 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; | |||
2864 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
2865 | return NVPTXISD::TexUnifiedCubeS32Float; | |||
2866 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
2867 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; | |||
2868 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
2869 | return NVPTXISD::TexUnifiedCubeU32Float; | |||
2870 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
2871 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; | |||
2872 | ||||
2873 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
2874 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; | |||
2875 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
2876 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; | |||
2877 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
2878 | return NVPTXISD::TexUnifiedCubeArrayS32Float; | |||
2879 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
2880 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; | |||
2881 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
2882 | return NVPTXISD::TexUnifiedCubeArrayU32Float; | |||
2883 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
2884 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; | |||
2885 | ||||
2886 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
2887 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; | |||
2888 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
2889 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; | |||
2890 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
2891 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; | |||
2892 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: | |||
2893 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; | |||
2894 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
2895 | return NVPTXISD::Tld4UnifiedR2DS64Float; | |||
2896 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
2897 | return NVPTXISD::Tld4UnifiedG2DS64Float; | |||
2898 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
2899 | return NVPTXISD::Tld4UnifiedB2DS64Float; | |||
2900 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
2901 | return NVPTXISD::Tld4UnifiedA2DS64Float; | |||
2902 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
2903 | return NVPTXISD::Tld4UnifiedR2DU64Float; | |||
2904 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
2905 | return NVPTXISD::Tld4UnifiedG2DU64Float; | |||
2906 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
2907 | return NVPTXISD::Tld4UnifiedB2DU64Float; | |||
2908 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: | |||
2909 | return NVPTXISD::Tld4UnifiedA2DU64Float; | |||
2910 | } | |||
2911 | } | |||
2912 | ||||
2913 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { | |||
2914 | switch (Intrinsic) { | |||
2915 | default: | |||
2916 | return 0; | |||
2917 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
2918 | return NVPTXISD::Suld1DI8Clamp; | |||
2919 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
2920 | return NVPTXISD::Suld1DI16Clamp; | |||
2921 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
2922 | return NVPTXISD::Suld1DI32Clamp; | |||
2923 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
2924 | return NVPTXISD::Suld1DI64Clamp; | |||
2925 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
2926 | return NVPTXISD::Suld1DV2I8Clamp; | |||
2927 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
2928 | return NVPTXISD::Suld1DV2I16Clamp; | |||
2929 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
2930 | return NVPTXISD::Suld1DV2I32Clamp; | |||
2931 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
2932 | return NVPTXISD::Suld1DV2I64Clamp; | |||
2933 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
2934 | return NVPTXISD::Suld1DV4I8Clamp; | |||
2935 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
2936 | return NVPTXISD::Suld1DV4I16Clamp; | |||
2937 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
2938 | return NVPTXISD::Suld1DV4I32Clamp; | |||
2939 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
2940 | return NVPTXISD::Suld1DArrayI8Clamp; | |||
2941 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
2942 | return NVPTXISD::Suld1DArrayI16Clamp; | |||
2943 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
2944 | return NVPTXISD::Suld1DArrayI32Clamp; | |||
2945 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
2946 | return NVPTXISD::Suld1DArrayI64Clamp; | |||
2947 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
2948 | return NVPTXISD::Suld1DArrayV2I8Clamp; | |||
2949 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
2950 | return NVPTXISD::Suld1DArrayV2I16Clamp; | |||
2951 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
2952 | return NVPTXISD::Suld1DArrayV2I32Clamp; | |||
2953 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
2954 | return NVPTXISD::Suld1DArrayV2I64Clamp; | |||
2955 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
2956 | return NVPTXISD::Suld1DArrayV4I8Clamp; | |||
2957 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
2958 | return NVPTXISD::Suld1DArrayV4I16Clamp; | |||
2959 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
2960 | return NVPTXISD::Suld1DArrayV4I32Clamp; | |||
2961 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
2962 | return NVPTXISD::Suld2DI8Clamp; | |||
2963 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
2964 | return NVPTXISD::Suld2DI16Clamp; | |||
2965 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
2966 | return NVPTXISD::Suld2DI32Clamp; | |||
2967 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
2968 | return NVPTXISD::Suld2DI64Clamp; | |||
2969 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
2970 | return NVPTXISD::Suld2DV2I8Clamp; | |||
2971 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
2972 | return NVPTXISD::Suld2DV2I16Clamp; | |||
2973 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
2974 | return NVPTXISD::Suld2DV2I32Clamp; | |||
2975 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
2976 | return NVPTXISD::Suld2DV2I64Clamp; | |||
2977 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
2978 | return NVPTXISD::Suld2DV4I8Clamp; | |||
2979 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
2980 | return NVPTXISD::Suld2DV4I16Clamp; | |||
2981 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
2982 | return NVPTXISD::Suld2DV4I32Clamp; | |||
2983 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
2984 | return NVPTXISD::Suld2DArrayI8Clamp; | |||
2985 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
2986 | return NVPTXISD::Suld2DArrayI16Clamp; | |||
2987 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
2988 | return NVPTXISD::Suld2DArrayI32Clamp; | |||
2989 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
2990 | return NVPTXISD::Suld2DArrayI64Clamp; | |||
2991 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
2992 | return NVPTXISD::Suld2DArrayV2I8Clamp; | |||
2993 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
2994 | return NVPTXISD::Suld2DArrayV2I16Clamp; | |||
2995 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
2996 | return NVPTXISD::Suld2DArrayV2I32Clamp; | |||
2997 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
2998 | return NVPTXISD::Suld2DArrayV2I64Clamp; | |||
2999 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
3000 | return NVPTXISD::Suld2DArrayV4I8Clamp; | |||
3001 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
3002 | return NVPTXISD::Suld2DArrayV4I16Clamp; | |||
3003 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
3004 | return NVPTXISD::Suld2DArrayV4I32Clamp; | |||
3005 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
3006 | return NVPTXISD::Suld3DI8Clamp; | |||
3007 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
3008 | return NVPTXISD::Suld3DI16Clamp; | |||
3009 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
3010 | return NVPTXISD::Suld3DI32Clamp; | |||
3011 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
3012 | return NVPTXISD::Suld3DI64Clamp; | |||
3013 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
3014 | return NVPTXISD::Suld3DV2I8Clamp; | |||
3015 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
3016 | return NVPTXISD::Suld3DV2I16Clamp; | |||
3017 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
3018 | return NVPTXISD::Suld3DV2I32Clamp; | |||
3019 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
3020 | return NVPTXISD::Suld3DV2I64Clamp; | |||
3021 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
3022 | return NVPTXISD::Suld3DV4I8Clamp; | |||
3023 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
3024 | return NVPTXISD::Suld3DV4I16Clamp; | |||
3025 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
3026 | return NVPTXISD::Suld3DV4I32Clamp; | |||
3027 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
3028 | return NVPTXISD::Suld1DI8Trap; | |||
3029 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
3030 | return NVPTXISD::Suld1DI16Trap; | |||
3031 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
3032 | return NVPTXISD::Suld1DI32Trap; | |||
3033 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3034 | return NVPTXISD::Suld1DI64Trap; | |||
3035 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3036 | return NVPTXISD::Suld1DV2I8Trap; | |||
3037 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3038 | return NVPTXISD::Suld1DV2I16Trap; | |||
3039 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3040 | return NVPTXISD::Suld1DV2I32Trap; | |||
3041 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3042 | return NVPTXISD::Suld1DV2I64Trap; | |||
3043 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3044 | return NVPTXISD::Suld1DV4I8Trap; | |||
3045 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3046 | return NVPTXISD::Suld1DV4I16Trap; | |||
3047 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3048 | return NVPTXISD::Suld1DV4I32Trap; | |||
3049 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3050 | return NVPTXISD::Suld1DArrayI8Trap; | |||
3051 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3052 | return NVPTXISD::Suld1DArrayI16Trap; | |||
3053 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3054 | return NVPTXISD::Suld1DArrayI32Trap; | |||
3055 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3056 | return NVPTXISD::Suld1DArrayI64Trap; | |||
3057 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3058 | return NVPTXISD::Suld1DArrayV2I8Trap; | |||
3059 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3060 | return NVPTXISD::Suld1DArrayV2I16Trap; | |||
3061 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3062 | return NVPTXISD::Suld1DArrayV2I32Trap; | |||
3063 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3064 | return NVPTXISD::Suld1DArrayV2I64Trap; | |||
3065 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3066 | return NVPTXISD::Suld1DArrayV4I8Trap; | |||
3067 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3068 | return NVPTXISD::Suld1DArrayV4I16Trap; | |||
3069 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3070 | return NVPTXISD::Suld1DArrayV4I32Trap; | |||
3071 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3072 | return NVPTXISD::Suld2DI8Trap; | |||
3073 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3074 | return NVPTXISD::Suld2DI16Trap; | |||
3075 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3076 | return NVPTXISD::Suld2DI32Trap; | |||
3077 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3078 | return NVPTXISD::Suld2DI64Trap; | |||
3079 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3080 | return NVPTXISD::Suld2DV2I8Trap; | |||
3081 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3082 | return NVPTXISD::Suld2DV2I16Trap; | |||
3083 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3084 | return NVPTXISD::Suld2DV2I32Trap; | |||
3085 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3086 | return NVPTXISD::Suld2DV2I64Trap; | |||
3087 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3088 | return NVPTXISD::Suld2DV4I8Trap; | |||
3089 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3090 | return NVPTXISD::Suld2DV4I16Trap; | |||
3091 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3092 | return NVPTXISD::Suld2DV4I32Trap; | |||
3093 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3094 | return NVPTXISD::Suld2DArrayI8Trap; | |||
3095 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3096 | return NVPTXISD::Suld2DArrayI16Trap; | |||
3097 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3098 | return NVPTXISD::Suld2DArrayI32Trap; | |||
3099 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3100 | return NVPTXISD::Suld2DArrayI64Trap; | |||
3101 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3102 | return NVPTXISD::Suld2DArrayV2I8Trap; | |||
3103 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3104 | return NVPTXISD::Suld2DArrayV2I16Trap; | |||
3105 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3106 | return NVPTXISD::Suld2DArrayV2I32Trap; | |||
3107 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3108 | return NVPTXISD::Suld2DArrayV2I64Trap; | |||
3109 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3110 | return NVPTXISD::Suld2DArrayV4I8Trap; | |||
3111 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3112 | return NVPTXISD::Suld2DArrayV4I16Trap; | |||
3113 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3114 | return NVPTXISD::Suld2DArrayV4I32Trap; | |||
3115 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3116 | return NVPTXISD::Suld3DI8Trap; | |||
3117 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3118 | return NVPTXISD::Suld3DI16Trap; | |||
3119 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3120 | return NVPTXISD::Suld3DI32Trap; | |||
3121 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3122 | return NVPTXISD::Suld3DI64Trap; | |||
3123 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3124 | return NVPTXISD::Suld3DV2I8Trap; | |||
3125 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3126 | return NVPTXISD::Suld3DV2I16Trap; | |||
3127 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3128 | return NVPTXISD::Suld3DV2I32Trap; | |||
3129 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3130 | return NVPTXISD::Suld3DV2I64Trap; | |||
3131 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3132 | return NVPTXISD::Suld3DV4I8Trap; | |||
3133 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3134 | return NVPTXISD::Suld3DV4I16Trap; | |||
3135 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3136 | return NVPTXISD::Suld3DV4I32Trap; | |||
3137 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3138 | return NVPTXISD::Suld1DI8Zero; | |||
3139 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3140 | return NVPTXISD::Suld1DI16Zero; | |||
3141 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3142 | return NVPTXISD::Suld1DI32Zero; | |||
3143 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3144 | return NVPTXISD::Suld1DI64Zero; | |||
3145 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3146 | return NVPTXISD::Suld1DV2I8Zero; | |||
3147 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3148 | return NVPTXISD::Suld1DV2I16Zero; | |||
3149 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3150 | return NVPTXISD::Suld1DV2I32Zero; | |||
3151 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3152 | return NVPTXISD::Suld1DV2I64Zero; | |||
3153 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3154 | return NVPTXISD::Suld1DV4I8Zero; | |||
3155 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3156 | return NVPTXISD::Suld1DV4I16Zero; | |||
3157 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3158 | return NVPTXISD::Suld1DV4I32Zero; | |||
3159 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3160 | return NVPTXISD::Suld1DArrayI8Zero; | |||
3161 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3162 | return NVPTXISD::Suld1DArrayI16Zero; | |||
3163 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3164 | return NVPTXISD::Suld1DArrayI32Zero; | |||
3165 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3166 | return NVPTXISD::Suld1DArrayI64Zero; | |||
3167 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3168 | return NVPTXISD::Suld1DArrayV2I8Zero; | |||
3169 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3170 | return NVPTXISD::Suld1DArrayV2I16Zero; | |||
3171 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3172 | return NVPTXISD::Suld1DArrayV2I32Zero; | |||
3173 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3174 | return NVPTXISD::Suld1DArrayV2I64Zero; | |||
3175 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3176 | return NVPTXISD::Suld1DArrayV4I8Zero; | |||
3177 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3178 | return NVPTXISD::Suld1DArrayV4I16Zero; | |||
3179 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3180 | return NVPTXISD::Suld1DArrayV4I32Zero; | |||
3181 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3182 | return NVPTXISD::Suld2DI8Zero; | |||
3183 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3184 | return NVPTXISD::Suld2DI16Zero; | |||
3185 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3186 | return NVPTXISD::Suld2DI32Zero; | |||
3187 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3188 | return NVPTXISD::Suld2DI64Zero; | |||
3189 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3190 | return NVPTXISD::Suld2DV2I8Zero; | |||
3191 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3192 | return NVPTXISD::Suld2DV2I16Zero; | |||
3193 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3194 | return NVPTXISD::Suld2DV2I32Zero; | |||
3195 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3196 | return NVPTXISD::Suld2DV2I64Zero; | |||
3197 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3198 | return NVPTXISD::Suld2DV4I8Zero; | |||
3199 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3200 | return NVPTXISD::Suld2DV4I16Zero; | |||
3201 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3202 | return NVPTXISD::Suld2DV4I32Zero; | |||
3203 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3204 | return NVPTXISD::Suld2DArrayI8Zero; | |||
3205 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3206 | return NVPTXISD::Suld2DArrayI16Zero; | |||
3207 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3208 | return NVPTXISD::Suld2DArrayI32Zero; | |||
3209 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3210 | return NVPTXISD::Suld2DArrayI64Zero; | |||
3211 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3212 | return NVPTXISD::Suld2DArrayV2I8Zero; | |||
3213 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3214 | return NVPTXISD::Suld2DArrayV2I16Zero; | |||
3215 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3216 | return NVPTXISD::Suld2DArrayV2I32Zero; | |||
3217 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3218 | return NVPTXISD::Suld2DArrayV2I64Zero; | |||
3219 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3220 | return NVPTXISD::Suld2DArrayV4I8Zero; | |||
3221 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3222 | return NVPTXISD::Suld2DArrayV4I16Zero; | |||
3223 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3224 | return NVPTXISD::Suld2DArrayV4I32Zero; | |||
3225 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3226 | return NVPTXISD::Suld3DI8Zero; | |||
3227 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3228 | return NVPTXISD::Suld3DI16Zero; | |||
3229 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3230 | return NVPTXISD::Suld3DI32Zero; | |||
3231 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3232 | return NVPTXISD::Suld3DI64Zero; | |||
3233 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3234 | return NVPTXISD::Suld3DV2I8Zero; | |||
3235 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3236 | return NVPTXISD::Suld3DV2I16Zero; | |||
3237 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3238 | return NVPTXISD::Suld3DV2I32Zero; | |||
3239 | case Intrinsic::nvvm_suld_3d_v2i64_zero: | |||
3240 | return NVPTXISD::Suld3DV2I64Zero; | |||
3241 | case Intrinsic::nvvm_suld_3d_v4i8_zero: | |||
3242 | return NVPTXISD::Suld3DV4I8Zero; | |||
3243 | case Intrinsic::nvvm_suld_3d_v4i16_zero: | |||
3244 | return NVPTXISD::Suld3DV4I16Zero; | |||
3245 | case Intrinsic::nvvm_suld_3d_v4i32_zero: | |||
3246 | return NVPTXISD::Suld3DV4I32Zero; | |||
3247 | } | |||
3248 | } | |||
3249 | ||||
3250 | // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as | |||
3251 | // TgtMemIntrinsic | |||
3252 | // because we need the information that is only available in the "Value" type | |||
3253 | // of destination | |||
3254 | // pointer. In particular, the address space information. | |||
3255 | bool NVPTXTargetLowering::getTgtMemIntrinsic( | |||
3256 | IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { | |||
3257 | switch (Intrinsic) { | |||
3258 | default: | |||
3259 | return false; | |||
3260 | ||||
3261 | case Intrinsic::nvvm_atomic_load_add_f32: | |||
3262 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3263 | Info.memVT = MVT::f32; | |||
3264 | Info.ptrVal = I.getArgOperand(0); | |||
3265 | Info.offset = 0; | |||
3266 | Info.vol = 0; | |||
3267 | Info.readMem = true; | |||
3268 | Info.writeMem = true; | |||
3269 | Info.align = 0; | |||
3270 | return true; | |||
3271 | ||||
3272 | case Intrinsic::nvvm_atomic_load_inc_32: | |||
3273 | case Intrinsic::nvvm_atomic_load_dec_32: | |||
3274 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3275 | Info.memVT = MVT::i32; | |||
3276 | Info.ptrVal = I.getArgOperand(0); | |||
3277 | Info.offset = 0; | |||
3278 | Info.vol = 0; | |||
3279 | Info.readMem = true; | |||
3280 | Info.writeMem = true; | |||
3281 | Info.align = 0; | |||
3282 | return true; | |||
3283 | ||||
3284 | case Intrinsic::nvvm_ldu_global_i: | |||
3285 | case Intrinsic::nvvm_ldu_global_f: | |||
3286 | case Intrinsic::nvvm_ldu_global_p: { | |||
3287 | ||||
3288 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3289 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) | |||
3290 | Info.memVT = getValueType(I.getType()); | |||
3291 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) | |||
3292 | Info.memVT = getPointerTy(); | |||
3293 | else | |||
3294 | Info.memVT = getValueType(I.getType()); | |||
3295 | Info.ptrVal = I.getArgOperand(0); | |||
3296 | Info.offset = 0; | |||
3297 | Info.vol = 0; | |||
3298 | Info.readMem = true; | |||
3299 | Info.writeMem = false; | |||
3300 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3301 | ||||
3302 | return true; | |||
3303 | } | |||
3304 | case Intrinsic::nvvm_ldg_global_i: | |||
3305 | case Intrinsic::nvvm_ldg_global_f: | |||
3306 | case Intrinsic::nvvm_ldg_global_p: { | |||
3307 | ||||
3308 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3309 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) | |||
3310 | Info.memVT = getValueType(I.getType()); | |||
3311 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) | |||
3312 | Info.memVT = getPointerTy(); | |||
3313 | else | |||
3314 | Info.memVT = getValueType(I.getType()); | |||
3315 | Info.ptrVal = I.getArgOperand(0); | |||
3316 | Info.offset = 0; | |||
3317 | Info.vol = 0; | |||
3318 | Info.readMem = true; | |||
3319 | Info.writeMem = false; | |||
3320 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3321 | ||||
3322 | return true; | |||
3323 | } | |||
3324 | ||||
3325 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
3326 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
3327 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
3328 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
3329 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
3330 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
3331 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
3332 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
3333 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
3334 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
3335 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
3336 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
3337 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
3338 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
3339 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
3340 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
3341 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
3342 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
3343 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
3344 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
3345 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
3346 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
3347 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
3348 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
3349 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
3350 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
3351 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
3352 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
3353 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
3354 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
3355 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
3356 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
3357 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
3358 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
3359 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
3360 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
3361 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
3362 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
3363 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
3364 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
3365 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
3366 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
3367 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
3368 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
3369 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
3370 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
3371 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
3372 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
3373 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
3374 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
3375 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
3376 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
3377 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
3378 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
3379 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
3380 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { | |||
3381 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3382 | Info.memVT = MVT::v4f32; | |||
3383 | Info.ptrVal = nullptr; | |||
3384 | Info.offset = 0; | |||
3385 | Info.vol = 0; | |||
3386 | Info.readMem = true; | |||
3387 | Info.writeMem = false; | |||
3388 | Info.align = 16; | |||
3389 | return true; | |||
3390 | } | |||
3391 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
3392 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
3393 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
3394 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
3395 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
3396 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
3397 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
3398 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
3399 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
3400 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
3401 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
3402 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
3403 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
3404 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
3405 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
3406 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
3407 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
3408 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
3409 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
3410 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
3411 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
3412 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
3413 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
3414 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
3415 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
3416 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
3417 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
3418 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
3419 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
3420 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
3421 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
3422 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
3423 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
3424 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
3425 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
3426 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
3427 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
3428 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
3429 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
3430 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
3431 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
3432 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
3433 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
3434 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
3435 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
3436 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
3437 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
3438 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
3439 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
3440 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
3441 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
3442 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
3443 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
3444 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
3445 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
3446 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
3447 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
3448 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
3449 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
3450 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
3451 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
3452 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
3453 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
3454 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
3455 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
3456 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
3457 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
3458 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
3459 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
3460 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
3461 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
3462 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
3463 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
3464 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
3465 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
3466 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
3467 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
3468 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
3469 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
3470 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
3471 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
3472 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
3473 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
3474 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
3475 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
3476 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
3477 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
3478 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
3479 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
3480 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
3481 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
3482 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
3483 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
3484 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
3485 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
3486 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
3487 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
3488 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
3489 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
3490 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
3491 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
3492 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
3493 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
3494 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
3495 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
3496 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
3497 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
3498 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
3499 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
3500 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
3501 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
3502 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { | |||
3503 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3504 | Info.memVT = MVT::v4i32; | |||
3505 | Info.ptrVal = nullptr; | |||
3506 | Info.offset = 0; | |||
3507 | Info.vol = 0; | |||
3508 | Info.readMem = true; | |||
3509 | Info.writeMem = false; | |||
3510 | Info.align = 16; | |||
3511 | return true; | |||
3512 | } | |||
3513 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
3514 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
3515 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
3516 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
3517 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
3518 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
3519 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
3520 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
3521 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
3522 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
3523 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
3524 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
3525 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
3526 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
3527 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
3528 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
3529 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3530 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3531 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3532 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3533 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3534 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3535 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3536 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3537 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3538 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3539 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3540 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3541 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3542 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3543 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3544 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3545 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3546 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3547 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3548 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3549 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3550 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3551 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3552 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3553 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3554 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3555 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3556 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3557 | case Intrinsic::nvvm_suld_3d_v4i8_zero: { | |||
3558 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3559 | Info.memVT = MVT::i8; | |||
3560 | Info.ptrVal = nullptr; | |||
3561 | Info.offset = 0; | |||
3562 | Info.vol = 0; | |||
3563 | Info.readMem = true; | |||
3564 | Info.writeMem = false; | |||
3565 | Info.align = 16; | |||
3566 | return true; | |||
3567 | } | |||
3568 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
3569 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
3570 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
3571 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
3572 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
3573 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
3574 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
3575 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
3576 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
3577 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
3578 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
3579 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
3580 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
3581 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
3582 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
3583 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
3584 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3585 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3586 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3587 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3588 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3589 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3590 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3591 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3592 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3593 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3594 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3595 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3596 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3597 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3598 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3599 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3600 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3601 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3602 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3603 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3604 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3605 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3606 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3607 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3608 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3609 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3610 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3611 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3612 | case Intrinsic::nvvm_suld_3d_v4i16_zero: { | |||
3613 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3614 | Info.memVT = MVT::i16; | |||
3615 | Info.ptrVal = nullptr; | |||
3616 | Info.offset = 0; | |||
3617 | Info.vol = 0; | |||
3618 | Info.readMem = true; | |||
3619 | Info.writeMem = false; | |||
3620 | Info.align = 16; | |||
3621 | return true; | |||
3622 | } | |||
3623 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
3624 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
3625 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
3626 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
3627 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
3628 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
3629 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
3630 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
3631 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
3632 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
3633 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
3634 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
3635 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
3636 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
3637 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
3638 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
3639 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3640 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3641 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3642 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3643 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3644 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3645 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3646 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3647 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3648 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3649 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3650 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3651 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3652 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3653 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3654 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3655 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3656 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3657 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3658 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3659 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3660 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3661 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3662 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3663 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3664 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3665 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3666 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3667 | case Intrinsic::nvvm_suld_3d_v4i32_zero: { | |||
3668 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3669 | Info.memVT = MVT::i32; | |||
3670 | Info.ptrVal = nullptr; | |||
3671 | Info.offset = 0; | |||
3672 | Info.vol = 0; | |||
3673 | Info.readMem = true; | |||
3674 | Info.writeMem = false; | |||
3675 | Info.align = 16; | |||
3676 | return true; | |||
3677 | } | |||
3678 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
3679 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
3680 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
3681 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
3682 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
3683 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
3684 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
3685 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
3686 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
3687 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
3688 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3689 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3690 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3691 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3692 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3693 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3694 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3695 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3696 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3697 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3698 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3699 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3700 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3701 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3702 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3703 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3704 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3705 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3706 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3707 | case Intrinsic::nvvm_suld_3d_v2i64_zero: { | |||
3708 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3709 | Info.memVT = MVT::i64; | |||
3710 | Info.ptrVal = nullptr; | |||
3711 | Info.offset = 0; | |||
3712 | Info.vol = 0; | |||
3713 | Info.readMem = true; | |||
3714 | Info.writeMem = false; | |||
3715 | Info.align = 16; | |||
3716 | return true; | |||
3717 | } | |||
3718 | } | |||
3719 | return false; | |||
3720 | } | |||
3721 | ||||
3722 | /// isLegalAddressingMode - Return true if the addressing mode represented | |||
3723 | /// by AM is legal for this target, for a load/store of the specified type. | |||
3724 | /// Used to guide target specific optimizations, like loop strength reduction | |||
3725 | /// (LoopStrengthReduce.cpp) and memory optimization for address mode | |||
3726 | /// (CodeGenPrepare.cpp) | |||
3727 | bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, | |||
3728 | Type *Ty, | |||
3729 | unsigned AS) const { | |||
3730 | ||||
3731 | // AddrMode - This represents an addressing mode of: | |||
3732 | // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | |||
3733 | // | |||
3734 | // The legal address modes are | |||
3735 | // - [avar] | |||
3736 | // - [areg] | |||
3737 | // - [areg+immoff] | |||
3738 | // - [immAddr] | |||
3739 | ||||
3740 | if (AM.BaseGV) { | |||
3741 | if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) | |||
3742 | return false; | |||
3743 | return true; | |||
3744 | } | |||
3745 | ||||
3746 | switch (AM.Scale) { | |||
3747 | case 0: // "r", "r+i" or "i" is allowed | |||
3748 | break; | |||
3749 | case 1: | |||
3750 | if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. | |||
3751 | return false; | |||
3752 | // Otherwise we have r+i. | |||
3753 | break; | |||
3754 | default: | |||
3755 | // No scale > 1 is allowed | |||
3756 | return false; | |||
3757 | } | |||
3758 | return true; | |||
3759 | } | |||
3760 | ||||
3761 | //===----------------------------------------------------------------------===// | |||
3762 | // NVPTX Inline Assembly Support | |||
3763 | //===----------------------------------------------------------------------===// | |||
3764 | ||||
3765 | /// getConstraintType - Given a constraint letter, return the type of | |||
3766 | /// constraint it is for this target. | |||
3767 | NVPTXTargetLowering::ConstraintType | |||
3768 | NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { | |||
3769 | if (Constraint.size() == 1) { | |||
3770 | switch (Constraint[0]) { | |||
3771 | default: | |||
3772 | break; | |||
3773 | case 'b': | |||
3774 | case 'r': | |||
3775 | case 'h': | |||
3776 | case 'c': | |||
3777 | case 'l': | |||
3778 | case 'f': | |||
3779 | case 'd': | |||
3780 | case '0': | |||
3781 | case 'N': | |||
3782 | return C_RegisterClass; | |||
3783 | } | |||
3784 | } | |||
3785 | return TargetLowering::getConstraintType(Constraint); | |||
3786 | } | |||
3787 | ||||
3788 | std::pair<unsigned, const TargetRegisterClass *> | |||
3789 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, | |||
3790 | const std::string &Constraint, | |||
3791 | MVT VT) const { | |||
3792 | if (Constraint.size() == 1) { | |||
3793 | switch (Constraint[0]) { | |||
3794 | case 'b': | |||
3795 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); | |||
3796 | case 'c': | |||
3797 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3798 | case 'h': | |||
3799 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3800 | case 'r': | |||
3801 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); | |||
3802 | case 'l': | |||
3803 | case 'N': | |||
3804 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); | |||
3805 | case 'f': | |||
3806 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); | |||
3807 | case 'd': | |||
3808 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); | |||
3809 | } | |||
3810 | } | |||
3811 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); | |||
3812 | } | |||
3813 | ||||
3814 | /// getFunctionAlignment - Return the Log2 alignment of this function. | |||
3815 | unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { | |||
3816 | return 4; | |||
3817 | } | |||
3818 | ||||
3819 | //===----------------------------------------------------------------------===// | |||
3820 | // NVPTX DAG Combining | |||
3821 | //===----------------------------------------------------------------------===// | |||
3822 | ||||
3823 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, | |||
3824 | CodeGenOpt::Level OptLevel) const { | |||
3825 | const Function *F = MF.getFunction(); | |||
3826 | const TargetOptions &TO = MF.getTarget().Options; | |||
3827 | ||||
3828 | // Always honor command-line argument | |||
3829 | if (FMAContractLevelOpt.getNumOccurrences() > 0) { | |||
3830 | return FMAContractLevelOpt > 0; | |||
3831 | } else if (OptLevel == 0) { | |||
3832 | // Do not contract if we're not optimizing the code | |||
3833 | return false; | |||
3834 | } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { | |||
3835 | // Honor TargetOptions flags that explicitly say fusion is okay | |||
3836 | return true; | |||
3837 | } else if (F->hasFnAttribute("unsafe-fp-math")) { | |||
3838 | // Check for unsafe-fp-math=true coming from Clang | |||
3839 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); | |||
3840 | StringRef Val = Attr.getValueAsString(); | |||
3841 | if (Val == "true") | |||
3842 | return true; | |||
3843 | } | |||
3844 | ||||
3845 | // We did not have a clear indication that fusion is allowed, so assume not | |||
3846 | return false; | |||
3847 | } | |||
3848 | ||||
3849 | /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with | |||
3850 | /// operands N0 and N1. This is a helper for PerformADDCombine that is | |||
3851 | /// called with the default operands, and if that fails, with commuted | |||
3852 | /// operands. | |||
3853 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, | |||
3854 | TargetLowering::DAGCombinerInfo &DCI, | |||
3855 | const NVPTXSubtarget &Subtarget, | |||
3856 | CodeGenOpt::Level OptLevel) { | |||
3857 | SelectionDAG &DAG = DCI.DAG; | |||
3858 | // Skip non-integer, non-scalar case | |||
3859 | EVT VT=N0.getValueType(); | |||
3860 | if (VT.isVector()) | |||
3861 | return SDValue(); | |||
3862 | ||||
3863 | // fold (add (mul a, b), c) -> (mad a, b, c) | |||
3864 | // | |||
3865 | if (N0.getOpcode() == ISD::MUL) { | |||
3866 | assert (VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 3866, __PRETTY_FUNCTION__)); | |||
3867 | // For integer: | |||
3868 | // Since integer multiply-add costs the same as integer multiply | |||
3869 | // but is more costly than integer add, do the fusion only when | |||
3870 | // the mul is only used in the add. | |||
3871 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || | |||
3872 | !N0.getNode()->hasOneUse()) | |||
3873 | return SDValue(); | |||
3874 | ||||
3875 | // Do the folding | |||
3876 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, | |||
3877 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3878 | } | |||
3879 | else if (N0.getOpcode() == ISD::FMUL) { | |||
3880 | if (VT == MVT::f32 || VT == MVT::f64) { | |||
3881 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( | |||
3882 | &DAG.getTargetLoweringInfo()); | |||
3883 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) | |||
3884 | return SDValue(); | |||
3885 | ||||
3886 | // For floating point: | |||
3887 | // Do the fusion only when the mul has less than 5 uses and all | |||
3888 | // are add. | |||
3889 | // The heuristic is that if a use is not an add, then that use | |||
3890 | // cannot be fused into fma, therefore mul is still needed anyway. | |||
3891 | // If there are more than 4 uses, even if they are all add, fusing | |||
3892 | // them will increase register pressue. | |||
3893 | // | |||
3894 | int numUses = 0; | |||
3895 | int nonAddCount = 0; | |||
3896 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), | |||
3897 | UE = N0.getNode()->use_end(); | |||
3898 | UI != UE; ++UI) { | |||
3899 | numUses++; | |||
3900 | SDNode *User = *UI; | |||
3901 | if (User->getOpcode() != ISD::FADD) | |||
3902 | ++nonAddCount; | |||
3903 | } | |||
3904 | if (numUses >= 5) | |||
3905 | return SDValue(); | |||
3906 | if (nonAddCount) { | |||
3907 | int orderNo = N->getIROrder(); | |||
3908 | int orderNo2 = N0.getNode()->getIROrder(); | |||
3909 | // simple heuristics here for considering potential register | |||
3910 | // pressure, the logics here is that the differnce are used | |||
3911 | // to measure the distance between def and use, the longer distance | |||
3912 | // more likely cause register pressure. | |||
3913 | if (orderNo - orderNo2 < 500) | |||
3914 | return SDValue(); | |||
3915 | ||||
3916 | // Now, check if at least one of the FMUL's operands is live beyond the node N, | |||
3917 | // which guarantees that the FMA will not increase register pressure at node N. | |||
3918 | bool opIsLive = false; | |||
3919 | const SDNode *left = N0.getOperand(0).getNode(); | |||
3920 | const SDNode *right = N0.getOperand(1).getNode(); | |||
3921 | ||||
3922 | if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) | |||
3923 | opIsLive = true; | |||
3924 | ||||
3925 | if (!opIsLive) | |||
3926 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { | |||
3927 | SDNode *User = *UI; | |||
3928 | int orderNo3 = User->getIROrder(); | |||
3929 | if (orderNo3 > orderNo) { | |||
3930 | opIsLive = true; | |||
3931 | break; | |||
3932 | } | |||
3933 | } | |||
3934 | ||||
3935 | if (!opIsLive) | |||
3936 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { | |||
3937 | SDNode *User = *UI; | |||
3938 | int orderNo3 = User->getIROrder(); | |||
3939 | if (orderNo3 > orderNo) { | |||
3940 | opIsLive = true; | |||
3941 | break; | |||
3942 | } | |||
3943 | } | |||
3944 | ||||
3945 | if (!opIsLive) | |||
3946 | return SDValue(); | |||
3947 | } | |||
3948 | ||||
3949 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, | |||
3950 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3951 | } | |||
3952 | } | |||
3953 | ||||
3954 | return SDValue(); | |||
3955 | } | |||
3956 | ||||
3957 | /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. | |||
3958 | /// | |||
3959 | static SDValue PerformADDCombine(SDNode *N, | |||
3960 | TargetLowering::DAGCombinerInfo &DCI, | |||
3961 | const NVPTXSubtarget &Subtarget, | |||
3962 | CodeGenOpt::Level OptLevel) { | |||
3963 | SDValue N0 = N->getOperand(0); | |||
3964 | SDValue N1 = N->getOperand(1); | |||
3965 | ||||
3966 | // First try with the default operand order. | |||
3967 | SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, | |||
3968 | OptLevel); | |||
3969 | if (Result.getNode()) | |||
3970 | return Result; | |||
3971 | ||||
3972 | // If that didn't work, try again with the operands commuted. | |||
3973 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); | |||
3974 | } | |||
3975 | ||||
3976 | static SDValue PerformANDCombine(SDNode *N, | |||
3977 | TargetLowering::DAGCombinerInfo &DCI) { | |||
3978 | // The type legalizer turns a vector load of i8 values into a zextload to i16 | |||
3979 | // registers, optionally ANY_EXTENDs it (if target type is integer), | |||
3980 | // and ANDs off the high 8 bits. Since we turn this load into a | |||
3981 | // target-specific DAG node, the DAG combiner fails to eliminate these AND | |||
3982 | // nodes. Do that here. | |||
3983 | SDValue Val = N->getOperand(0); | |||
3984 | SDValue Mask = N->getOperand(1); | |||
3985 | ||||
3986 | if (isa<ConstantSDNode>(Val)) { | |||
3987 | std::swap(Val, Mask); | |||
3988 | } | |||
3989 | ||||
3990 | SDValue AExt; | |||
3991 | // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and | |||
3992 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | |||
3993 | AExt = Val; | |||
3994 | Val = Val->getOperand(0); | |||
3995 | } | |||
3996 | ||||
3997 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { | |||
3998 | Val = Val->getOperand(0); | |||
3999 | } | |||
4000 | ||||
4001 | if (Val->getOpcode() == NVPTXISD::LoadV2 || | |||
4002 | Val->getOpcode() == NVPTXISD::LoadV4) { | |||
4003 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); | |||
4004 | if (!MaskCnst) { | |||
4005 | // Not an AND with a constant | |||
4006 | return SDValue(); | |||
4007 | } | |||
4008 | ||||
4009 | uint64_t MaskVal = MaskCnst->getZExtValue(); | |||
4010 | if (MaskVal != 0xff) { | |||
4011 | // Not an AND that chops off top 8 bits | |||
4012 | return SDValue(); | |||
4013 | } | |||
4014 | ||||
4015 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); | |||
4016 | if (!Mem) { | |||
4017 | // Not a MemSDNode?!? | |||
4018 | return SDValue(); | |||
4019 | } | |||
4020 | ||||
4021 | EVT MemVT = Mem->getMemoryVT(); | |||
4022 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { | |||
4023 | // We only handle the i8 case | |||
4024 | return SDValue(); | |||
4025 | } | |||
4026 | ||||
4027 | unsigned ExtType = | |||
4028 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> | |||
4029 | getZExtValue(); | |||
4030 | if (ExtType == ISD::SEXTLOAD) { | |||
4031 | // If for some reason the load is a sextload, the and is needed to zero | |||
4032 | // out the high 8 bits | |||
4033 | return SDValue(); | |||
4034 | } | |||
4035 | ||||
4036 | bool AddTo = false; | |||
4037 | if (AExt.getNode() != 0) { | |||
4038 | // Re-insert the ext as a zext. | |||
4039 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), | |||
4040 | AExt.getValueType(), Val); | |||
4041 | AddTo = true; | |||
4042 | } | |||
4043 | ||||
4044 | // If we get here, the AND is unnecessary. Just replace it with the load | |||
4045 | DCI.CombineTo(N, Val, AddTo); | |||
4046 | } | |||
4047 | ||||
4048 | return SDValue(); | |||
4049 | } | |||
4050 | ||||
4051 | enum OperandSignedness { | |||
4052 | Signed = 0, | |||
4053 | Unsigned, | |||
4054 | Unknown | |||
4055 | }; | |||
4056 | ||||
4057 | /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand | |||
4058 | /// that can be demoted to \p OptSize bits without loss of information. The | |||
4059 | /// signedness of the operand, if determinable, is placed in \p S. | |||
4060 | static bool IsMulWideOperandDemotable(SDValue Op, | |||
4061 | unsigned OptSize, | |||
4062 | OperandSignedness &S) { | |||
4063 | S = Unknown; | |||
4064 | ||||
4065 | if (Op.getOpcode() == ISD::SIGN_EXTEND || | |||
4066 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { | |||
4067 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4068 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4069 | S = Signed; | |||
4070 | return true; | |||
4071 | } | |||
4072 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { | |||
4073 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4074 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4075 | S = Unsigned; | |||
4076 | return true; | |||
4077 | } | |||
4078 | } | |||
4079 | ||||
4080 | return false; | |||
4081 | } | |||
4082 | ||||
4083 | /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can | |||
4084 | /// be demoted to \p OptSize bits without loss of information. If the operands | |||
4085 | /// contain a constant, it should appear as the RHS operand. The signedness of | |||
4086 | /// the operands is placed in \p IsSigned. | |||
4087 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, | |||
4088 | unsigned OptSize, | |||
4089 | bool &IsSigned) { | |||
4090 | ||||
4091 | OperandSignedness LHSSign; | |||
4092 | ||||
4093 | // The LHS operand must be a demotable op | |||
4094 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) | |||
4095 | return false; | |||
4096 | ||||
4097 | // We should have been able to determine the signedness from the LHS | |||
4098 | if (LHSSign == Unknown) | |||
4099 | return false; | |||
4100 | ||||
4101 | IsSigned = (LHSSign == Signed); | |||
4102 | ||||
4103 | // The RHS can be a demotable op or a constant | |||
4104 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { | |||
4105 | APInt Val = CI->getAPIntValue(); | |||
4106 | if (LHSSign == Unsigned) { | |||
4107 | if (Val.isIntN(OptSize)) { | |||
4108 | return true; | |||
4109 | } | |||
4110 | return false; | |||
4111 | } else { | |||
4112 | if (Val.isSignedIntN(OptSize)) { | |||
4113 | return true; | |||
4114 | } | |||
4115 | return false; | |||
4116 | } | |||
4117 | } else { | |||
4118 | OperandSignedness RHSSign; | |||
4119 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) | |||
4120 | return false; | |||
4121 | ||||
4122 | if (LHSSign != RHSSign) | |||
4123 | return false; | |||
4124 | ||||
4125 | return true; | |||
4126 | } | |||
4127 | } | |||
4128 | ||||
4129 | /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply | |||
4130 | /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform | |||
4131 | /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift | |||
4132 | /// amount. | |||
4133 | static SDValue TryMULWIDECombine(SDNode *N, | |||
4134 | TargetLowering::DAGCombinerInfo &DCI) { | |||
4135 | EVT MulType = N->getValueType(0); | |||
4136 | if (MulType != MVT::i32 && MulType != MVT::i64) { | |||
4137 | return SDValue(); | |||
4138 | } | |||
4139 | ||||
4140 | SDLoc DL(N); | |||
4141 | unsigned OptSize = MulType.getSizeInBits() >> 1; | |||
4142 | SDValue LHS = N->getOperand(0); | |||
4143 | SDValue RHS = N->getOperand(1); | |||
4144 | ||||
4145 | // Canonicalize the multiply so the constant (if any) is on the right | |||
4146 | if (N->getOpcode() == ISD::MUL) { | |||
4147 | if (isa<ConstantSDNode>(LHS)) { | |||
4148 | std::swap(LHS, RHS); | |||
4149 | } | |||
4150 | } | |||
4151 | ||||
4152 | // If we have a SHL, determine the actual multiply amount | |||
4153 | if (N->getOpcode() == ISD::SHL) { | |||
4154 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); | |||
4155 | if (!ShlRHS) { | |||
4156 | return SDValue(); | |||
4157 | } | |||
4158 | ||||
4159 | APInt ShiftAmt = ShlRHS->getAPIntValue(); | |||
4160 | unsigned BitWidth = MulType.getSizeInBits(); | |||
4161 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { | |||
4162 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; | |||
4163 | RHS = DCI.DAG.getConstant(MulVal, DL, MulType); | |||
4164 | } else { | |||
4165 | return SDValue(); | |||
4166 | } | |||
4167 | } | |||
4168 | ||||
4169 | bool Signed; | |||
4170 | // Verify that our operands are demotable | |||
4171 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { | |||
4172 | return SDValue(); | |||
4173 | } | |||
4174 | ||||
4175 | EVT DemotedVT; | |||
4176 | if (MulType == MVT::i32) { | |||
4177 | DemotedVT = MVT::i16; | |||
4178 | } else { | |||
4179 | DemotedVT = MVT::i32; | |||
4180 | } | |||
4181 | ||||
4182 | // Truncate the operands to the correct size. Note that these are just for | |||
4183 | // type consistency and will (likely) be eliminated in later phases. | |||
4184 | SDValue TruncLHS = | |||
4185 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); | |||
4186 | SDValue TruncRHS = | |||
4187 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); | |||
4188 | ||||
4189 | unsigned Opc; | |||
4190 | if (Signed) { | |||
4191 | Opc = NVPTXISD::MUL_WIDE_SIGNED; | |||
4192 | } else { | |||
4193 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; | |||
4194 | } | |||
4195 | ||||
4196 | return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); | |||
4197 | } | |||
4198 | ||||
4199 | /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. | |||
4200 | static SDValue PerformMULCombine(SDNode *N, | |||
4201 | TargetLowering::DAGCombinerInfo &DCI, | |||
4202 | CodeGenOpt::Level OptLevel) { | |||
4203 | if (OptLevel > 0) { | |||
4204 | // Try mul.wide combining at OptLevel > 0 | |||
4205 | SDValue Ret = TryMULWIDECombine(N, DCI); | |||
4206 | if (Ret.getNode()) | |||
4207 | return Ret; | |||
4208 | } | |||
4209 | ||||
4210 | return SDValue(); | |||
4211 | } | |||
4212 | ||||
4213 | /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. | |||
4214 | static SDValue PerformSHLCombine(SDNode *N, | |||
4215 | TargetLowering::DAGCombinerInfo &DCI, | |||
4216 | CodeGenOpt::Level OptLevel) { | |||
4217 | if (OptLevel > 0) { | |||
4218 | // Try mul.wide combining at OptLevel > 0 | |||
4219 | SDValue Ret = TryMULWIDECombine(N, DCI); | |||
4220 | if (Ret.getNode()) | |||
4221 | return Ret; | |||
4222 | } | |||
4223 | ||||
4224 | return SDValue(); | |||
4225 | } | |||
4226 | ||||
4227 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, | |||
4228 | DAGCombinerInfo &DCI) const { | |||
4229 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); | |||
4230 | switch (N->getOpcode()) { | |||
4231 | default: break; | |||
4232 | case ISD::ADD: | |||
4233 | case ISD::FADD: | |||
4234 | return PerformADDCombine(N, DCI, STI, OptLevel); | |||
4235 | case ISD::MUL: | |||
4236 | return PerformMULCombine(N, DCI, OptLevel); | |||
4237 | case ISD::SHL: | |||
4238 | return PerformSHLCombine(N, DCI, OptLevel); | |||
4239 | case ISD::AND: | |||
4240 | return PerformANDCombine(N, DCI); | |||
4241 | } | |||
4242 | return SDValue(); | |||
4243 | } | |||
4244 | ||||
4245 | /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. | |||
4246 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, | |||
4247 | const DataLayout *TD, | |||
4248 | SmallVectorImpl<SDValue> &Results) { | |||
4249 | EVT ResVT = N->getValueType(0); | |||
4250 | SDLoc DL(N); | |||
4251 | ||||
4252 | assert(ResVT.isVector() && "Vector load must have vector type")((ResVT.isVector() && "Vector load must have vector type" ) ? static_cast<void> (0) : __assert_fail ("ResVT.isVector() && \"Vector load must have vector type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4252, __PRETTY_FUNCTION__)); | |||
4253 | ||||
4254 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
4255 | // legal. We can (and should) split that into 2 loads of <2 x double> here | |||
4256 | // but I'm leaving that as a TODO for now. | |||
4257 | assert(ResVT.isSimple() && "Can only handle simple types")((ResVT.isSimple() && "Can only handle simple types") ? static_cast<void> (0) : __assert_fail ("ResVT.isSimple() && \"Can only handle simple types\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4257, __PRETTY_FUNCTION__)); | |||
4258 | switch (ResVT.getSimpleVT().SimpleTy) { | |||
4259 | default: | |||
4260 | return; | |||
4261 | case MVT::v2i8: | |||
4262 | case MVT::v2i16: | |||
4263 | case MVT::v2i32: | |||
4264 | case MVT::v2i64: | |||
4265 | case MVT::v2f32: | |||
4266 | case MVT::v2f64: | |||
4267 | case MVT::v4i8: | |||
4268 | case MVT::v4i16: | |||
4269 | case MVT::v4i32: | |||
4270 | case MVT::v4f32: | |||
4271 | // This is a "native" vector type | |||
4272 | break; | |||
4273 | } | |||
4274 | ||||
4275 | LoadSDNode *LD = cast<LoadSDNode>(N); | |||
4276 | ||||
4277 | unsigned Align = LD->getAlignment(); | |||
4278 | unsigned PrefAlign = | |||
4279 | TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); | |||
4280 | if (Align < PrefAlign) { | |||
4281 | // This load is not sufficiently aligned, so bail out and let this vector | |||
4282 | // load be scalarized. Note that we may still be able to emit smaller | |||
4283 | // vector loads. For example, if we are loading a <4 x float> with an | |||
4284 | // alignment of 8, this check will fail but the legalizer will try again | |||
4285 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
4286 | return; | |||
4287 | } | |||
4288 | ||||
4289 | EVT EltVT = ResVT.getVectorElementType(); | |||
4290 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4291 | ||||
4292 | // Since LoadV2 is a target node, we cannot rely on DAG type legalization. | |||
4293 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4294 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4295 | bool NeedTrunc = false; | |||
4296 | if (EltVT.getSizeInBits() < 16) { | |||
4297 | EltVT = MVT::i16; | |||
4298 | NeedTrunc = true; | |||
4299 | } | |||
4300 | ||||
4301 | unsigned Opcode = 0; | |||
4302 | SDVTList LdResVTs; | |||
4303 | ||||
4304 | switch (NumElts) { | |||
4305 | default: | |||
4306 | return; | |||
4307 | case 2: | |||
4308 | Opcode = NVPTXISD::LoadV2; | |||
4309 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4310 | break; | |||
4311 | case 4: { | |||
4312 | Opcode = NVPTXISD::LoadV4; | |||
4313 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4314 | LdResVTs = DAG.getVTList(ListVTs); | |||
4315 | break; | |||
4316 | } | |||
4317 | } | |||
4318 | ||||
4319 | // Copy regular operands | |||
4320 | SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); | |||
4321 | ||||
4322 | // The select routine does not have access to the LoadSDNode instance, so | |||
4323 | // pass along the extension information | |||
4324 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); | |||
4325 | ||||
4326 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4327 | LD->getMemoryVT(), | |||
4328 | LD->getMemOperand()); | |||
4329 | ||||
4330 | SmallVector<SDValue, 4> ScalarRes; | |||
4331 | ||||
4332 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4333 | SDValue Res = NewLD.getValue(i); | |||
4334 | if (NeedTrunc) | |||
4335 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4336 | ScalarRes.push_back(Res); | |||
4337 | } | |||
4338 | ||||
4339 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4340 | ||||
4341 | SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); | |||
4342 | ||||
4343 | Results.push_back(BuildVec); | |||
4344 | Results.push_back(LoadChain); | |||
4345 | } | |||
4346 | ||||
4347 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, | |||
4348 | SmallVectorImpl<SDValue> &Results) { | |||
4349 | SDValue Chain = N->getOperand(0); | |||
4350 | SDValue Intrin = N->getOperand(1); | |||
4351 | SDLoc DL(N); | |||
4352 | ||||
4353 | // Get the intrinsic ID | |||
4354 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); | |||
4355 | switch (IntrinNo) { | |||
4356 | default: | |||
4357 | return; | |||
4358 | case Intrinsic::nvvm_ldg_global_i: | |||
4359 | case Intrinsic::nvvm_ldg_global_f: | |||
4360 | case Intrinsic::nvvm_ldg_global_p: | |||
4361 | case Intrinsic::nvvm_ldu_global_i: | |||
4362 | case Intrinsic::nvvm_ldu_global_f: | |||
4363 | case Intrinsic::nvvm_ldu_global_p: { | |||
4364 | EVT ResVT = N->getValueType(0); | |||
4365 | ||||
4366 | if (ResVT.isVector()) { | |||
4367 | // Vector LDG/LDU | |||
4368 | ||||
4369 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4370 | EVT EltVT = ResVT.getVectorElementType(); | |||
4371 | ||||
4372 | // Since LDU/LDG are target nodes, we cannot rely on DAG type | |||
4373 | // legalization. | |||
4374 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4375 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4376 | bool NeedTrunc = false; | |||
4377 | if (EltVT.getSizeInBits() < 16) { | |||
4378 | EltVT = MVT::i16; | |||
4379 | NeedTrunc = true; | |||
4380 | } | |||
4381 | ||||
4382 | unsigned Opcode = 0; | |||
4383 | SDVTList LdResVTs; | |||
4384 | ||||
4385 | switch (NumElts) { | |||
4386 | default: | |||
4387 | return; | |||
4388 | case 2: | |||
4389 | switch (IntrinNo) { | |||
4390 | default: | |||
4391 | return; | |||
4392 | case Intrinsic::nvvm_ldg_global_i: | |||
4393 | case Intrinsic::nvvm_ldg_global_f: | |||
4394 | case Intrinsic::nvvm_ldg_global_p: | |||
4395 | Opcode = NVPTXISD::LDGV2; | |||
4396 | break; | |||
4397 | case Intrinsic::nvvm_ldu_global_i: | |||
4398 | case Intrinsic::nvvm_ldu_global_f: | |||
4399 | case Intrinsic::nvvm_ldu_global_p: | |||
4400 | Opcode = NVPTXISD::LDUV2; | |||
4401 | break; | |||
4402 | } | |||
4403 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4404 | break; | |||
4405 | case 4: { | |||
4406 | switch (IntrinNo) { | |||
4407 | default: | |||
4408 | return; | |||
4409 | case Intrinsic::nvvm_ldg_global_i: | |||
4410 | case Intrinsic::nvvm_ldg_global_f: | |||
4411 | case Intrinsic::nvvm_ldg_global_p: | |||
4412 | Opcode = NVPTXISD::LDGV4; | |||
4413 | break; | |||
4414 | case Intrinsic::nvvm_ldu_global_i: | |||
4415 | case Intrinsic::nvvm_ldu_global_f: | |||
4416 | case Intrinsic::nvvm_ldu_global_p: | |||
4417 | Opcode = NVPTXISD::LDUV4; | |||
4418 | break; | |||
4419 | } | |||
4420 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4421 | LdResVTs = DAG.getVTList(ListVTs); | |||
4422 | break; | |||
4423 | } | |||
4424 | } | |||
4425 | ||||
4426 | SmallVector<SDValue, 8> OtherOps; | |||
4427 | ||||
4428 | // Copy regular operands | |||
4429 | ||||
4430 | OtherOps.push_back(Chain); // Chain | |||
4431 | // Skip operand 1 (intrinsic ID) | |||
4432 | // Others | |||
4433 | OtherOps.append(N->op_begin() + 2, N->op_end()); | |||
4434 | ||||
4435 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4436 | ||||
4437 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4438 | MemSD->getMemoryVT(), | |||
4439 | MemSD->getMemOperand()); | |||
4440 | ||||
4441 | SmallVector<SDValue, 4> ScalarRes; | |||
4442 | ||||
4443 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4444 | SDValue Res = NewLD.getValue(i); | |||
4445 | if (NeedTrunc) | |||
4446 | Res = | |||
4447 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4448 | ScalarRes.push_back(Res); | |||
4449 | } | |||
4450 | ||||
4451 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4452 | ||||
4453 | SDValue BuildVec = | |||
4454 | DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); | |||
4455 | ||||
4456 | Results.push_back(BuildVec); | |||
4457 | Results.push_back(LoadChain); | |||
4458 | } else { | |||
4459 | // i8 LDG/LDU | |||
4460 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4461, __PRETTY_FUNCTION__)) | |||
4461 | "Custom handling of non-i8 ldu/ldg?")((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4461, __PRETTY_FUNCTION__)); | |||
4462 | ||||
4463 | // Just copy all operands as-is | |||
4464 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); | |||
4465 | ||||
4466 | // Force output to i16 | |||
4467 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); | |||
4468 | ||||
4469 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4470 | ||||
4471 | // We make sure the memory type is i8, which will be used during isel | |||
4472 | // to select the proper instruction. | |||
4473 | SDValue NewLD = | |||
4474 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, | |||
4475 | MVT::i8, MemSD->getMemOperand()); | |||
4476 | ||||
4477 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, | |||
4478 | NewLD.getValue(0))); | |||
4479 | Results.push_back(NewLD.getValue(1)); | |||
4480 | } | |||
4481 | } | |||
4482 | } | |||
4483 | } | |||
4484 | ||||
4485 | void NVPTXTargetLowering::ReplaceNodeResults( | |||
4486 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { | |||
4487 | switch (N->getOpcode()) { | |||
4488 | default: | |||
4489 | report_fatal_error("Unhandled custom legalization"); | |||
4490 | case ISD::LOAD: | |||
4491 | ReplaceLoadVector(N, DAG, getDataLayout(), Results); | |||
4492 | return; | |||
4493 | case ISD::INTRINSIC_W_CHAIN: | |||
4494 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); | |||
4495 | return; | |||
4496 | } | |||
4497 | } | |||
4498 | ||||
4499 | // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. | |||
4500 | void NVPTXSection::anchor() {} | |||
4501 | ||||
4502 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { | |||
4503 | delete TextSection; | |||
4504 | delete DataSection; | |||
4505 | delete BSSSection; | |||
4506 | delete ReadOnlySection; | |||
4507 | ||||
4508 | delete StaticCtorSection; | |||
4509 | delete StaticDtorSection; | |||
4510 | delete LSDASection; | |||
4511 | delete EHFrameSection; | |||
4512 | delete DwarfAbbrevSection; | |||
4513 | delete DwarfInfoSection; | |||
4514 | delete DwarfLineSection; | |||
4515 | delete DwarfFrameSection; | |||
4516 | delete DwarfPubTypesSection; | |||
4517 | delete DwarfDebugInlineSection; | |||
4518 | delete DwarfStrSection; | |||
4519 | delete DwarfLocSection; | |||
4520 | delete DwarfARangesSection; | |||
4521 | delete DwarfRangesSection; | |||
4522 | } | |||
4523 | ||||
4524 | MCSection * | |||
4525 | NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, | |||
4526 | SectionKind Kind, Mangler &Mang, | |||
4527 | const TargetMachine &TM) const { | |||
4528 | return getDataSection(); | |||
4529 | } |