File: | lib/Target/NVPTX/NVPTXISelLowering.cpp |
Warning: | line 1658, column 13 Called C++ object pointer is null |
1 | // | |||
2 | // The LLVM Compiler Infrastructure | |||
3 | // | |||
4 | // This file is distributed under the University of Illinois Open Source | |||
5 | // License. See LICENSE.TXT for details. | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a | |||
10 | // selection DAG. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "NVPTXISelLowering.h" | |||
15 | #include "NVPTX.h" | |||
16 | #include "NVPTXTargetMachine.h" | |||
17 | #include "NVPTXTargetObjectFile.h" | |||
18 | #include "NVPTXUtilities.h" | |||
19 | #include "llvm/CodeGen/Analysis.h" | |||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
21 | #include "llvm/CodeGen/MachineFunction.h" | |||
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
24 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | |||
25 | #include "llvm/IR/CallSite.h" | |||
26 | #include "llvm/IR/DerivedTypes.h" | |||
27 | #include "llvm/IR/Function.h" | |||
28 | #include "llvm/IR/GlobalValue.h" | |||
29 | #include "llvm/IR/IntrinsicInst.h" | |||
30 | #include "llvm/IR/Intrinsics.h" | |||
31 | #include "llvm/IR/Module.h" | |||
32 | #include "llvm/MC/MCSectionELF.h" | |||
33 | #include "llvm/Support/CommandLine.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Support/ErrorHandling.h" | |||
36 | #include "llvm/Support/MathExtras.h" | |||
37 | #include "llvm/Support/raw_ostream.h" | |||
38 | #include <sstream> | |||
39 | ||||
40 | #undef DEBUG_TYPE"nvptx-lower" | |||
41 | #define DEBUG_TYPE"nvptx-lower" "nvptx-lower" | |||
42 | ||||
43 | using namespace llvm; | |||
44 | ||||
45 | static unsigned int uniqueCallSite = 0; | |||
46 | ||||
47 | static cl::opt<bool> sched4reg( | |||
48 | "nvptx-sched4reg", | |||
49 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); | |||
50 | ||||
51 | static cl::opt<unsigned> | |||
52 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, | |||
53 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" | |||
54 | " 1: do it 2: do it aggressively"), | |||
55 | cl::init(2)); | |||
56 | ||||
57 | static bool IsPTXVectorType(MVT VT) { | |||
58 | switch (VT.SimpleTy) { | |||
59 | default: | |||
60 | return false; | |||
61 | case MVT::v2i1: | |||
62 | case MVT::v4i1: | |||
63 | case MVT::v2i8: | |||
64 | case MVT::v4i8: | |||
65 | case MVT::v2i16: | |||
66 | case MVT::v4i16: | |||
67 | case MVT::v2i32: | |||
68 | case MVT::v4i32: | |||
69 | case MVT::v2i64: | |||
70 | case MVT::v2f32: | |||
71 | case MVT::v4f32: | |||
72 | case MVT::v2f64: | |||
73 | return true; | |||
74 | } | |||
75 | } | |||
76 | ||||
77 | /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive | |||
78 | /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors | |||
79 | /// into their primitive components. | |||
80 | /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the | |||
81 | /// same number of types as the Ins/Outs arrays in LowerFormalArguments, | |||
82 | /// LowerCall, and LowerReturn. | |||
83 | static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, | |||
84 | Type *Ty, SmallVectorImpl<EVT> &ValueVTs, | |||
85 | SmallVectorImpl<uint64_t> *Offsets = nullptr, | |||
86 | uint64_t StartingOffset = 0) { | |||
87 | SmallVector<EVT, 16> TempVTs; | |||
88 | SmallVector<uint64_t, 16> TempOffsets; | |||
89 | ||||
90 | ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); | |||
91 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { | |||
92 | EVT VT = TempVTs[i]; | |||
93 | uint64_t Off = TempOffsets[i]; | |||
94 | if (VT.isVector()) | |||
95 | for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { | |||
96 | ValueVTs.push_back(VT.getVectorElementType()); | |||
97 | if (Offsets) | |||
98 | Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); | |||
99 | } | |||
100 | else { | |||
101 | ValueVTs.push_back(VT); | |||
102 | if (Offsets) | |||
103 | Offsets->push_back(Off); | |||
104 | } | |||
105 | } | |||
106 | } | |||
107 | ||||
108 | // NVPTXTargetLowering Constructor. | |||
109 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, | |||
110 | const NVPTXSubtarget &STI) | |||
111 | : TargetLowering(TM), nvTM(&TM), STI(STI) { | |||
112 | ||||
113 | // always lower memset, memcpy, and memmove intrinsics to load/store | |||
114 | // instructions, rather | |||
115 | // then generating calls to memset, mempcy or memmove. | |||
116 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; | |||
117 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; | |||
118 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; | |||
119 | ||||
120 | setBooleanContents(ZeroOrNegativeOneBooleanContent); | |||
121 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | |||
122 | ||||
123 | // Jump is Expensive. Don't create extra control flow for 'and', 'or' | |||
124 | // condition branches. | |||
125 | setJumpIsExpensive(true); | |||
126 | ||||
127 | // Wide divides are _very_ slow. Try to reduce the width of the divide if | |||
128 | // possible. | |||
129 | addBypassSlowDiv(64, 32); | |||
130 | ||||
131 | // By default, use the Source scheduling | |||
132 | if (sched4reg) | |||
133 | setSchedulingPreference(Sched::RegPressure); | |||
134 | else | |||
135 | setSchedulingPreference(Sched::Source); | |||
136 | ||||
137 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); | |||
138 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); | |||
139 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); | |||
140 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); | |||
141 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); | |||
142 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); | |||
143 | ||||
144 | // Operations not directly supported by NVPTX. | |||
145 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); | |||
146 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); | |||
147 | setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); | |||
148 | setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); | |||
149 | setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); | |||
150 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); | |||
151 | setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); | |||
152 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); | |||
153 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); | |||
154 | setOperationAction(ISD::BR_CC, MVT::i1, Expand); | |||
155 | setOperationAction(ISD::BR_CC, MVT::i8, Expand); | |||
156 | setOperationAction(ISD::BR_CC, MVT::i16, Expand); | |||
157 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); | |||
158 | setOperationAction(ISD::BR_CC, MVT::i64, Expand); | |||
159 | // Some SIGN_EXTEND_INREG can be done using cvt instruction. | |||
160 | // For others we will expand to a SHL/SRA pair. | |||
161 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); | |||
162 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); | |||
163 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); | |||
164 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); | |||
165 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | |||
166 | ||||
167 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); | |||
168 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); | |||
169 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); | |||
170 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); | |||
171 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); | |||
172 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); | |||
173 | ||||
174 | if (STI.hasROT64()) { | |||
175 | setOperationAction(ISD::ROTL, MVT::i64, Legal); | |||
176 | setOperationAction(ISD::ROTR, MVT::i64, Legal); | |||
177 | } else { | |||
178 | setOperationAction(ISD::ROTL, MVT::i64, Expand); | |||
179 | setOperationAction(ISD::ROTR, MVT::i64, Expand); | |||
180 | } | |||
181 | if (STI.hasROT32()) { | |||
182 | setOperationAction(ISD::ROTL, MVT::i32, Legal); | |||
183 | setOperationAction(ISD::ROTR, MVT::i32, Legal); | |||
184 | } else { | |||
185 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | |||
186 | setOperationAction(ISD::ROTR, MVT::i32, Expand); | |||
187 | } | |||
188 | ||||
189 | setOperationAction(ISD::ROTL, MVT::i16, Expand); | |||
190 | setOperationAction(ISD::ROTR, MVT::i16, Expand); | |||
191 | setOperationAction(ISD::ROTL, MVT::i8, Expand); | |||
192 | setOperationAction(ISD::ROTR, MVT::i8, Expand); | |||
193 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); | |||
194 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | |||
195 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); | |||
196 | ||||
197 | // Indirect branch is not supported. | |||
198 | // This also disables Jump Table creation. | |||
199 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); | |||
200 | setOperationAction(ISD::BRIND, MVT::Other, Expand); | |||
201 | ||||
202 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | |||
203 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); | |||
204 | ||||
205 | // We want to legalize constant related memmove and memcopy | |||
206 | // intrinsics. | |||
207 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); | |||
208 | ||||
209 | // Turn FP extload into load/fpextend | |||
210 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); | |||
211 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); | |||
212 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); | |||
213 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); | |||
214 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); | |||
215 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); | |||
216 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); | |||
217 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); | |||
218 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); | |||
219 | // Turn FP truncstore into trunc + store. | |||
220 | // FIXME: vector types should also be expanded | |||
221 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | |||
222 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | |||
223 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | |||
224 | ||||
225 | // PTX does not support load / store predicate registers | |||
226 | setOperationAction(ISD::LOAD, MVT::i1, Custom); | |||
227 | setOperationAction(ISD::STORE, MVT::i1, Custom); | |||
228 | ||||
229 | for (MVT VT : MVT::integer_valuetypes()) { | |||
230 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | |||
231 | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); | |||
232 | setTruncStoreAction(VT, MVT::i1, Expand); | |||
233 | } | |||
234 | ||||
235 | // This is legal in NVPTX | |||
236 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); | |||
237 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); | |||
238 | ||||
239 | // TRAP can be lowered to PTX trap | |||
240 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | |||
241 | ||||
242 | setOperationAction(ISD::ADDC, MVT::i64, Expand); | |||
243 | setOperationAction(ISD::ADDE, MVT::i64, Expand); | |||
244 | ||||
245 | // Register custom handling for vector loads/stores | |||
246 | for (MVT VT : MVT::vector_valuetypes()) { | |||
247 | if (IsPTXVectorType(VT)) { | |||
248 | setOperationAction(ISD::LOAD, VT, Custom); | |||
249 | setOperationAction(ISD::STORE, VT, Custom); | |||
250 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); | |||
251 | } | |||
252 | } | |||
253 | ||||
254 | // Custom handling for i8 intrinsics | |||
255 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); | |||
256 | ||||
257 | setOperationAction(ISD::CTLZ, MVT::i16, Legal); | |||
258 | setOperationAction(ISD::CTLZ, MVT::i32, Legal); | |||
259 | setOperationAction(ISD::CTLZ, MVT::i64, Legal); | |||
260 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); | |||
261 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); | |||
262 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); | |||
263 | setOperationAction(ISD::CTPOP, MVT::i16, Legal); | |||
264 | setOperationAction(ISD::CTPOP, MVT::i32, Legal); | |||
265 | setOperationAction(ISD::CTPOP, MVT::i64, Legal); | |||
266 | ||||
267 | // PTX does not directly support SELP of i1, so promote to i32 first | |||
268 | setOperationAction(ISD::SELECT, MVT::i1, Custom); | |||
269 | ||||
270 | // PTX cannot multiply two i64s in a single instruction. | |||
271 | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); | |||
272 | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); | |||
273 | ||||
274 | // We have some custom DAG combine patterns for these nodes | |||
275 | setTargetDAGCombine(ISD::ADD); | |||
276 | setTargetDAGCombine(ISD::AND); | |||
277 | setTargetDAGCombine(ISD::FADD); | |||
278 | setTargetDAGCombine(ISD::MUL); | |||
279 | setTargetDAGCombine(ISD::SHL); | |||
280 | setTargetDAGCombine(ISD::SELECT); | |||
281 | setTargetDAGCombine(ISD::SREM); | |||
282 | setTargetDAGCombine(ISD::UREM); | |||
283 | ||||
284 | // Library functions. These default to Expand, but we have instructions | |||
285 | // for them. | |||
286 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); | |||
287 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); | |||
288 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); | |||
289 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); | |||
290 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); | |||
291 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); | |||
292 | setOperationAction(ISD::FRINT, MVT::f32, Legal); | |||
293 | setOperationAction(ISD::FRINT, MVT::f64, Legal); | |||
294 | setOperationAction(ISD::FROUND, MVT::f32, Legal); | |||
295 | setOperationAction(ISD::FROUND, MVT::f64, Legal); | |||
296 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); | |||
297 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); | |||
298 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); | |||
299 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); | |||
300 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); | |||
301 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); | |||
302 | ||||
303 | // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate. | |||
304 | // No FPOW or FREM in PTX. | |||
305 | ||||
306 | // Now deduce the information based on the above mentioned | |||
307 | // actions | |||
308 | computeRegisterProperties(STI.getRegisterInfo()); | |||
309 | } | |||
310 | ||||
311 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { | |||
312 | switch ((NVPTXISD::NodeType)Opcode) { | |||
313 | case NVPTXISD::FIRST_NUMBER: | |||
314 | break; | |||
315 | case NVPTXISD::CALL: | |||
316 | return "NVPTXISD::CALL"; | |||
317 | case NVPTXISD::RET_FLAG: | |||
318 | return "NVPTXISD::RET_FLAG"; | |||
319 | case NVPTXISD::LOAD_PARAM: | |||
320 | return "NVPTXISD::LOAD_PARAM"; | |||
321 | case NVPTXISD::Wrapper: | |||
322 | return "NVPTXISD::Wrapper"; | |||
323 | case NVPTXISD::DeclareParam: | |||
324 | return "NVPTXISD::DeclareParam"; | |||
325 | case NVPTXISD::DeclareScalarParam: | |||
326 | return "NVPTXISD::DeclareScalarParam"; | |||
327 | case NVPTXISD::DeclareRet: | |||
328 | return "NVPTXISD::DeclareRet"; | |||
329 | case NVPTXISD::DeclareScalarRet: | |||
330 | return "NVPTXISD::DeclareScalarRet"; | |||
331 | case NVPTXISD::DeclareRetParam: | |||
332 | return "NVPTXISD::DeclareRetParam"; | |||
333 | case NVPTXISD::PrintCall: | |||
334 | return "NVPTXISD::PrintCall"; | |||
335 | case NVPTXISD::PrintConvergentCall: | |||
336 | return "NVPTXISD::PrintConvergentCall"; | |||
337 | case NVPTXISD::PrintCallUni: | |||
338 | return "NVPTXISD::PrintCallUni"; | |||
339 | case NVPTXISD::PrintConvergentCallUni: | |||
340 | return "NVPTXISD::PrintConvergentCallUni"; | |||
341 | case NVPTXISD::LoadParam: | |||
342 | return "NVPTXISD::LoadParam"; | |||
343 | case NVPTXISD::LoadParamV2: | |||
344 | return "NVPTXISD::LoadParamV2"; | |||
345 | case NVPTXISD::LoadParamV4: | |||
346 | return "NVPTXISD::LoadParamV4"; | |||
347 | case NVPTXISD::StoreParam: | |||
348 | return "NVPTXISD::StoreParam"; | |||
349 | case NVPTXISD::StoreParamV2: | |||
350 | return "NVPTXISD::StoreParamV2"; | |||
351 | case NVPTXISD::StoreParamV4: | |||
352 | return "NVPTXISD::StoreParamV4"; | |||
353 | case NVPTXISD::StoreParamS32: | |||
354 | return "NVPTXISD::StoreParamS32"; | |||
355 | case NVPTXISD::StoreParamU32: | |||
356 | return "NVPTXISD::StoreParamU32"; | |||
357 | case NVPTXISD::CallArgBegin: | |||
358 | return "NVPTXISD::CallArgBegin"; | |||
359 | case NVPTXISD::CallArg: | |||
360 | return "NVPTXISD::CallArg"; | |||
361 | case NVPTXISD::LastCallArg: | |||
362 | return "NVPTXISD::LastCallArg"; | |||
363 | case NVPTXISD::CallArgEnd: | |||
364 | return "NVPTXISD::CallArgEnd"; | |||
365 | case NVPTXISD::CallVoid: | |||
366 | return "NVPTXISD::CallVoid"; | |||
367 | case NVPTXISD::CallVal: | |||
368 | return "NVPTXISD::CallVal"; | |||
369 | case NVPTXISD::CallSymbol: | |||
370 | return "NVPTXISD::CallSymbol"; | |||
371 | case NVPTXISD::Prototype: | |||
372 | return "NVPTXISD::Prototype"; | |||
373 | case NVPTXISD::MoveParam: | |||
374 | return "NVPTXISD::MoveParam"; | |||
375 | case NVPTXISD::StoreRetval: | |||
376 | return "NVPTXISD::StoreRetval"; | |||
377 | case NVPTXISD::StoreRetvalV2: | |||
378 | return "NVPTXISD::StoreRetvalV2"; | |||
379 | case NVPTXISD::StoreRetvalV4: | |||
380 | return "NVPTXISD::StoreRetvalV4"; | |||
381 | case NVPTXISD::PseudoUseParam: | |||
382 | return "NVPTXISD::PseudoUseParam"; | |||
383 | case NVPTXISD::RETURN: | |||
384 | return "NVPTXISD::RETURN"; | |||
385 | case NVPTXISD::CallSeqBegin: | |||
386 | return "NVPTXISD::CallSeqBegin"; | |||
387 | case NVPTXISD::CallSeqEnd: | |||
388 | return "NVPTXISD::CallSeqEnd"; | |||
389 | case NVPTXISD::CallPrototype: | |||
390 | return "NVPTXISD::CallPrototype"; | |||
391 | case NVPTXISD::LoadV2: | |||
392 | return "NVPTXISD::LoadV2"; | |||
393 | case NVPTXISD::LoadV4: | |||
394 | return "NVPTXISD::LoadV4"; | |||
395 | case NVPTXISD::LDGV2: | |||
396 | return "NVPTXISD::LDGV2"; | |||
397 | case NVPTXISD::LDGV4: | |||
398 | return "NVPTXISD::LDGV4"; | |||
399 | case NVPTXISD::LDUV2: | |||
400 | return "NVPTXISD::LDUV2"; | |||
401 | case NVPTXISD::LDUV4: | |||
402 | return "NVPTXISD::LDUV4"; | |||
403 | case NVPTXISD::StoreV2: | |||
404 | return "NVPTXISD::StoreV2"; | |||
405 | case NVPTXISD::StoreV4: | |||
406 | return "NVPTXISD::StoreV4"; | |||
407 | case NVPTXISD::FUN_SHFL_CLAMP: | |||
408 | return "NVPTXISD::FUN_SHFL_CLAMP"; | |||
409 | case NVPTXISD::FUN_SHFR_CLAMP: | |||
410 | return "NVPTXISD::FUN_SHFR_CLAMP"; | |||
411 | case NVPTXISD::IMAD: | |||
412 | return "NVPTXISD::IMAD"; | |||
413 | case NVPTXISD::Dummy: | |||
414 | return "NVPTXISD::Dummy"; | |||
415 | case NVPTXISD::MUL_WIDE_SIGNED: | |||
416 | return "NVPTXISD::MUL_WIDE_SIGNED"; | |||
417 | case NVPTXISD::MUL_WIDE_UNSIGNED: | |||
418 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; | |||
419 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; | |||
420 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; | |||
421 | case NVPTXISD::Tex1DFloatFloatLevel: | |||
422 | return "NVPTXISD::Tex1DFloatFloatLevel"; | |||
423 | case NVPTXISD::Tex1DFloatFloatGrad: | |||
424 | return "NVPTXISD::Tex1DFloatFloatGrad"; | |||
425 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; | |||
426 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; | |||
427 | case NVPTXISD::Tex1DS32FloatLevel: | |||
428 | return "NVPTXISD::Tex1DS32FloatLevel"; | |||
429 | case NVPTXISD::Tex1DS32FloatGrad: | |||
430 | return "NVPTXISD::Tex1DS32FloatGrad"; | |||
431 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; | |||
432 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; | |||
433 | case NVPTXISD::Tex1DU32FloatLevel: | |||
434 | return "NVPTXISD::Tex1DU32FloatLevel"; | |||
435 | case NVPTXISD::Tex1DU32FloatGrad: | |||
436 | return "NVPTXISD::Tex1DU32FloatGrad"; | |||
437 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; | |||
438 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; | |||
439 | case NVPTXISD::Tex1DArrayFloatFloatLevel: | |||
440 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; | |||
441 | case NVPTXISD::Tex1DArrayFloatFloatGrad: | |||
442 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; | |||
443 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; | |||
444 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; | |||
445 | case NVPTXISD::Tex1DArrayS32FloatLevel: | |||
446 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; | |||
447 | case NVPTXISD::Tex1DArrayS32FloatGrad: | |||
448 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; | |||
449 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; | |||
450 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; | |||
451 | case NVPTXISD::Tex1DArrayU32FloatLevel: | |||
452 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; | |||
453 | case NVPTXISD::Tex1DArrayU32FloatGrad: | |||
454 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; | |||
455 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; | |||
456 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; | |||
457 | case NVPTXISD::Tex2DFloatFloatLevel: | |||
458 | return "NVPTXISD::Tex2DFloatFloatLevel"; | |||
459 | case NVPTXISD::Tex2DFloatFloatGrad: | |||
460 | return "NVPTXISD::Tex2DFloatFloatGrad"; | |||
461 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; | |||
462 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; | |||
463 | case NVPTXISD::Tex2DS32FloatLevel: | |||
464 | return "NVPTXISD::Tex2DS32FloatLevel"; | |||
465 | case NVPTXISD::Tex2DS32FloatGrad: | |||
466 | return "NVPTXISD::Tex2DS32FloatGrad"; | |||
467 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; | |||
468 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; | |||
469 | case NVPTXISD::Tex2DU32FloatLevel: | |||
470 | return "NVPTXISD::Tex2DU32FloatLevel"; | |||
471 | case NVPTXISD::Tex2DU32FloatGrad: | |||
472 | return "NVPTXISD::Tex2DU32FloatGrad"; | |||
473 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; | |||
474 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; | |||
475 | case NVPTXISD::Tex2DArrayFloatFloatLevel: | |||
476 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; | |||
477 | case NVPTXISD::Tex2DArrayFloatFloatGrad: | |||
478 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; | |||
479 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; | |||
480 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; | |||
481 | case NVPTXISD::Tex2DArrayS32FloatLevel: | |||
482 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; | |||
483 | case NVPTXISD::Tex2DArrayS32FloatGrad: | |||
484 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; | |||
485 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; | |||
486 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; | |||
487 | case NVPTXISD::Tex2DArrayU32FloatLevel: | |||
488 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; | |||
489 | case NVPTXISD::Tex2DArrayU32FloatGrad: | |||
490 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; | |||
491 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; | |||
492 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; | |||
493 | case NVPTXISD::Tex3DFloatFloatLevel: | |||
494 | return "NVPTXISD::Tex3DFloatFloatLevel"; | |||
495 | case NVPTXISD::Tex3DFloatFloatGrad: | |||
496 | return "NVPTXISD::Tex3DFloatFloatGrad"; | |||
497 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; | |||
498 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; | |||
499 | case NVPTXISD::Tex3DS32FloatLevel: | |||
500 | return "NVPTXISD::Tex3DS32FloatLevel"; | |||
501 | case NVPTXISD::Tex3DS32FloatGrad: | |||
502 | return "NVPTXISD::Tex3DS32FloatGrad"; | |||
503 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; | |||
504 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; | |||
505 | case NVPTXISD::Tex3DU32FloatLevel: | |||
506 | return "NVPTXISD::Tex3DU32FloatLevel"; | |||
507 | case NVPTXISD::Tex3DU32FloatGrad: | |||
508 | return "NVPTXISD::Tex3DU32FloatGrad"; | |||
509 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; | |||
510 | case NVPTXISD::TexCubeFloatFloatLevel: | |||
511 | return "NVPTXISD::TexCubeFloatFloatLevel"; | |||
512 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; | |||
513 | case NVPTXISD::TexCubeS32FloatLevel: | |||
514 | return "NVPTXISD::TexCubeS32FloatLevel"; | |||
515 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; | |||
516 | case NVPTXISD::TexCubeU32FloatLevel: | |||
517 | return "NVPTXISD::TexCubeU32FloatLevel"; | |||
518 | case NVPTXISD::TexCubeArrayFloatFloat: | |||
519 | return "NVPTXISD::TexCubeArrayFloatFloat"; | |||
520 | case NVPTXISD::TexCubeArrayFloatFloatLevel: | |||
521 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; | |||
522 | case NVPTXISD::TexCubeArrayS32Float: | |||
523 | return "NVPTXISD::TexCubeArrayS32Float"; | |||
524 | case NVPTXISD::TexCubeArrayS32FloatLevel: | |||
525 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; | |||
526 | case NVPTXISD::TexCubeArrayU32Float: | |||
527 | return "NVPTXISD::TexCubeArrayU32Float"; | |||
528 | case NVPTXISD::TexCubeArrayU32FloatLevel: | |||
529 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; | |||
530 | case NVPTXISD::Tld4R2DFloatFloat: | |||
531 | return "NVPTXISD::Tld4R2DFloatFloat"; | |||
532 | case NVPTXISD::Tld4G2DFloatFloat: | |||
533 | return "NVPTXISD::Tld4G2DFloatFloat"; | |||
534 | case NVPTXISD::Tld4B2DFloatFloat: | |||
535 | return "NVPTXISD::Tld4B2DFloatFloat"; | |||
536 | case NVPTXISD::Tld4A2DFloatFloat: | |||
537 | return "NVPTXISD::Tld4A2DFloatFloat"; | |||
538 | case NVPTXISD::Tld4R2DS64Float: | |||
539 | return "NVPTXISD::Tld4R2DS64Float"; | |||
540 | case NVPTXISD::Tld4G2DS64Float: | |||
541 | return "NVPTXISD::Tld4G2DS64Float"; | |||
542 | case NVPTXISD::Tld4B2DS64Float: | |||
543 | return "NVPTXISD::Tld4B2DS64Float"; | |||
544 | case NVPTXISD::Tld4A2DS64Float: | |||
545 | return "NVPTXISD::Tld4A2DS64Float"; | |||
546 | case NVPTXISD::Tld4R2DU64Float: | |||
547 | return "NVPTXISD::Tld4R2DU64Float"; | |||
548 | case NVPTXISD::Tld4G2DU64Float: | |||
549 | return "NVPTXISD::Tld4G2DU64Float"; | |||
550 | case NVPTXISD::Tld4B2DU64Float: | |||
551 | return "NVPTXISD::Tld4B2DU64Float"; | |||
552 | case NVPTXISD::Tld4A2DU64Float: | |||
553 | return "NVPTXISD::Tld4A2DU64Float"; | |||
554 | ||||
555 | case NVPTXISD::TexUnified1DFloatS32: | |||
556 | return "NVPTXISD::TexUnified1DFloatS32"; | |||
557 | case NVPTXISD::TexUnified1DFloatFloat: | |||
558 | return "NVPTXISD::TexUnified1DFloatFloat"; | |||
559 | case NVPTXISD::TexUnified1DFloatFloatLevel: | |||
560 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; | |||
561 | case NVPTXISD::TexUnified1DFloatFloatGrad: | |||
562 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; | |||
563 | case NVPTXISD::TexUnified1DS32S32: | |||
564 | return "NVPTXISD::TexUnified1DS32S32"; | |||
565 | case NVPTXISD::TexUnified1DS32Float: | |||
566 | return "NVPTXISD::TexUnified1DS32Float"; | |||
567 | case NVPTXISD::TexUnified1DS32FloatLevel: | |||
568 | return "NVPTXISD::TexUnified1DS32FloatLevel"; | |||
569 | case NVPTXISD::TexUnified1DS32FloatGrad: | |||
570 | return "NVPTXISD::TexUnified1DS32FloatGrad"; | |||
571 | case NVPTXISD::TexUnified1DU32S32: | |||
572 | return "NVPTXISD::TexUnified1DU32S32"; | |||
573 | case NVPTXISD::TexUnified1DU32Float: | |||
574 | return "NVPTXISD::TexUnified1DU32Float"; | |||
575 | case NVPTXISD::TexUnified1DU32FloatLevel: | |||
576 | return "NVPTXISD::TexUnified1DU32FloatLevel"; | |||
577 | case NVPTXISD::TexUnified1DU32FloatGrad: | |||
578 | return "NVPTXISD::TexUnified1DU32FloatGrad"; | |||
579 | case NVPTXISD::TexUnified1DArrayFloatS32: | |||
580 | return "NVPTXISD::TexUnified1DArrayFloatS32"; | |||
581 | case NVPTXISD::TexUnified1DArrayFloatFloat: | |||
582 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; | |||
583 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: | |||
584 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; | |||
585 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: | |||
586 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; | |||
587 | case NVPTXISD::TexUnified1DArrayS32S32: | |||
588 | return "NVPTXISD::TexUnified1DArrayS32S32"; | |||
589 | case NVPTXISD::TexUnified1DArrayS32Float: | |||
590 | return "NVPTXISD::TexUnified1DArrayS32Float"; | |||
591 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: | |||
592 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; | |||
593 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: | |||
594 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; | |||
595 | case NVPTXISD::TexUnified1DArrayU32S32: | |||
596 | return "NVPTXISD::TexUnified1DArrayU32S32"; | |||
597 | case NVPTXISD::TexUnified1DArrayU32Float: | |||
598 | return "NVPTXISD::TexUnified1DArrayU32Float"; | |||
599 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: | |||
600 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; | |||
601 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: | |||
602 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; | |||
603 | case NVPTXISD::TexUnified2DFloatS32: | |||
604 | return "NVPTXISD::TexUnified2DFloatS32"; | |||
605 | case NVPTXISD::TexUnified2DFloatFloat: | |||
606 | return "NVPTXISD::TexUnified2DFloatFloat"; | |||
607 | case NVPTXISD::TexUnified2DFloatFloatLevel: | |||
608 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; | |||
609 | case NVPTXISD::TexUnified2DFloatFloatGrad: | |||
610 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; | |||
611 | case NVPTXISD::TexUnified2DS32S32: | |||
612 | return "NVPTXISD::TexUnified2DS32S32"; | |||
613 | case NVPTXISD::TexUnified2DS32Float: | |||
614 | return "NVPTXISD::TexUnified2DS32Float"; | |||
615 | case NVPTXISD::TexUnified2DS32FloatLevel: | |||
616 | return "NVPTXISD::TexUnified2DS32FloatLevel"; | |||
617 | case NVPTXISD::TexUnified2DS32FloatGrad: | |||
618 | return "NVPTXISD::TexUnified2DS32FloatGrad"; | |||
619 | case NVPTXISD::TexUnified2DU32S32: | |||
620 | return "NVPTXISD::TexUnified2DU32S32"; | |||
621 | case NVPTXISD::TexUnified2DU32Float: | |||
622 | return "NVPTXISD::TexUnified2DU32Float"; | |||
623 | case NVPTXISD::TexUnified2DU32FloatLevel: | |||
624 | return "NVPTXISD::TexUnified2DU32FloatLevel"; | |||
625 | case NVPTXISD::TexUnified2DU32FloatGrad: | |||
626 | return "NVPTXISD::TexUnified2DU32FloatGrad"; | |||
627 | case NVPTXISD::TexUnified2DArrayFloatS32: | |||
628 | return "NVPTXISD::TexUnified2DArrayFloatS32"; | |||
629 | case NVPTXISD::TexUnified2DArrayFloatFloat: | |||
630 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; | |||
631 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: | |||
632 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; | |||
633 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: | |||
634 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; | |||
635 | case NVPTXISD::TexUnified2DArrayS32S32: | |||
636 | return "NVPTXISD::TexUnified2DArrayS32S32"; | |||
637 | case NVPTXISD::TexUnified2DArrayS32Float: | |||
638 | return "NVPTXISD::TexUnified2DArrayS32Float"; | |||
639 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: | |||
640 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; | |||
641 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: | |||
642 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; | |||
643 | case NVPTXISD::TexUnified2DArrayU32S32: | |||
644 | return "NVPTXISD::TexUnified2DArrayU32S32"; | |||
645 | case NVPTXISD::TexUnified2DArrayU32Float: | |||
646 | return "NVPTXISD::TexUnified2DArrayU32Float"; | |||
647 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: | |||
648 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; | |||
649 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: | |||
650 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; | |||
651 | case NVPTXISD::TexUnified3DFloatS32: | |||
652 | return "NVPTXISD::TexUnified3DFloatS32"; | |||
653 | case NVPTXISD::TexUnified3DFloatFloat: | |||
654 | return "NVPTXISD::TexUnified3DFloatFloat"; | |||
655 | case NVPTXISD::TexUnified3DFloatFloatLevel: | |||
656 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; | |||
657 | case NVPTXISD::TexUnified3DFloatFloatGrad: | |||
658 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; | |||
659 | case NVPTXISD::TexUnified3DS32S32: | |||
660 | return "NVPTXISD::TexUnified3DS32S32"; | |||
661 | case NVPTXISD::TexUnified3DS32Float: | |||
662 | return "NVPTXISD::TexUnified3DS32Float"; | |||
663 | case NVPTXISD::TexUnified3DS32FloatLevel: | |||
664 | return "NVPTXISD::TexUnified3DS32FloatLevel"; | |||
665 | case NVPTXISD::TexUnified3DS32FloatGrad: | |||
666 | return "NVPTXISD::TexUnified3DS32FloatGrad"; | |||
667 | case NVPTXISD::TexUnified3DU32S32: | |||
668 | return "NVPTXISD::TexUnified3DU32S32"; | |||
669 | case NVPTXISD::TexUnified3DU32Float: | |||
670 | return "NVPTXISD::TexUnified3DU32Float"; | |||
671 | case NVPTXISD::TexUnified3DU32FloatLevel: | |||
672 | return "NVPTXISD::TexUnified3DU32FloatLevel"; | |||
673 | case NVPTXISD::TexUnified3DU32FloatGrad: | |||
674 | return "NVPTXISD::TexUnified3DU32FloatGrad"; | |||
675 | case NVPTXISD::TexUnifiedCubeFloatFloat: | |||
676 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; | |||
677 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: | |||
678 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; | |||
679 | case NVPTXISD::TexUnifiedCubeS32Float: | |||
680 | return "NVPTXISD::TexUnifiedCubeS32Float"; | |||
681 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: | |||
682 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; | |||
683 | case NVPTXISD::TexUnifiedCubeU32Float: | |||
684 | return "NVPTXISD::TexUnifiedCubeU32Float"; | |||
685 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: | |||
686 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; | |||
687 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: | |||
688 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; | |||
689 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: | |||
690 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; | |||
691 | case NVPTXISD::TexUnifiedCubeArrayS32Float: | |||
692 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; | |||
693 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: | |||
694 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; | |||
695 | case NVPTXISD::TexUnifiedCubeArrayU32Float: | |||
696 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; | |||
697 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: | |||
698 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; | |||
699 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: | |||
700 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; | |||
701 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: | |||
702 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; | |||
703 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: | |||
704 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; | |||
705 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: | |||
706 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; | |||
707 | case NVPTXISD::Tld4UnifiedR2DS64Float: | |||
708 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; | |||
709 | case NVPTXISD::Tld4UnifiedG2DS64Float: | |||
710 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; | |||
711 | case NVPTXISD::Tld4UnifiedB2DS64Float: | |||
712 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; | |||
713 | case NVPTXISD::Tld4UnifiedA2DS64Float: | |||
714 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; | |||
715 | case NVPTXISD::Tld4UnifiedR2DU64Float: | |||
716 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; | |||
717 | case NVPTXISD::Tld4UnifiedG2DU64Float: | |||
718 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; | |||
719 | case NVPTXISD::Tld4UnifiedB2DU64Float: | |||
720 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; | |||
721 | case NVPTXISD::Tld4UnifiedA2DU64Float: | |||
722 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; | |||
723 | ||||
724 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; | |||
725 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; | |||
726 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; | |||
727 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; | |||
728 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; | |||
729 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; | |||
730 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; | |||
731 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; | |||
732 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; | |||
733 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; | |||
734 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; | |||
735 | ||||
736 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; | |||
737 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; | |||
738 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; | |||
739 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; | |||
740 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; | |||
741 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; | |||
742 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; | |||
743 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; | |||
744 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; | |||
745 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; | |||
746 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; | |||
747 | ||||
748 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; | |||
749 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; | |||
750 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; | |||
751 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; | |||
752 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; | |||
753 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; | |||
754 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; | |||
755 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; | |||
756 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; | |||
757 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; | |||
758 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; | |||
759 | ||||
760 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; | |||
761 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; | |||
762 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; | |||
763 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; | |||
764 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; | |||
765 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; | |||
766 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; | |||
767 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; | |||
768 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; | |||
769 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; | |||
770 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; | |||
771 | ||||
772 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; | |||
773 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; | |||
774 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; | |||
775 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; | |||
776 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; | |||
777 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; | |||
778 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; | |||
779 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; | |||
780 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; | |||
781 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; | |||
782 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; | |||
783 | ||||
784 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; | |||
785 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; | |||
786 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; | |||
787 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; | |||
788 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; | |||
789 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; | |||
790 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; | |||
791 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; | |||
792 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; | |||
793 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; | |||
794 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; | |||
795 | ||||
796 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; | |||
797 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; | |||
798 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; | |||
799 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; | |||
800 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; | |||
801 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; | |||
802 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; | |||
803 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; | |||
804 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; | |||
805 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; | |||
806 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; | |||
807 | ||||
808 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; | |||
809 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; | |||
810 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; | |||
811 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; | |||
812 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; | |||
813 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; | |||
814 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; | |||
815 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; | |||
816 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; | |||
817 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; | |||
818 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; | |||
819 | ||||
820 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; | |||
821 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; | |||
822 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; | |||
823 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; | |||
824 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; | |||
825 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; | |||
826 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; | |||
827 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; | |||
828 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; | |||
829 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; | |||
830 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; | |||
831 | ||||
832 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; | |||
833 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; | |||
834 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; | |||
835 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; | |||
836 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; | |||
837 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; | |||
838 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; | |||
839 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; | |||
840 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; | |||
841 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; | |||
842 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; | |||
843 | ||||
844 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; | |||
845 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; | |||
846 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; | |||
847 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; | |||
848 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; | |||
849 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; | |||
850 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; | |||
851 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; | |||
852 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; | |||
853 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; | |||
854 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; | |||
855 | ||||
856 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; | |||
857 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; | |||
858 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; | |||
859 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; | |||
860 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; | |||
861 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; | |||
862 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; | |||
863 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; | |||
864 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; | |||
865 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; | |||
866 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; | |||
867 | ||||
868 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; | |||
869 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; | |||
870 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; | |||
871 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; | |||
872 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; | |||
873 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; | |||
874 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; | |||
875 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; | |||
876 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; | |||
877 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; | |||
878 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; | |||
879 | ||||
880 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; | |||
881 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; | |||
882 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; | |||
883 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; | |||
884 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; | |||
885 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; | |||
886 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; | |||
887 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; | |||
888 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; | |||
889 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; | |||
890 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; | |||
891 | ||||
892 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; | |||
893 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; | |||
894 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; | |||
895 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; | |||
896 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; | |||
897 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; | |||
898 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; | |||
899 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; | |||
900 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; | |||
901 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; | |||
902 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; | |||
903 | } | |||
904 | return nullptr; | |||
905 | } | |||
906 | ||||
907 | TargetLoweringBase::LegalizeTypeAction | |||
908 | NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { | |||
909 | if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) | |||
910 | return TypeSplitVector; | |||
911 | ||||
912 | return TargetLoweringBase::getPreferredVectorAction(VT); | |||
913 | } | |||
914 | ||||
915 | SDValue | |||
916 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { | |||
917 | SDLoc dl(Op); | |||
918 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | |||
919 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
920 | Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); | |||
921 | return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); | |||
922 | } | |||
923 | ||||
924 | std::string NVPTXTargetLowering::getPrototype( | |||
925 | const DataLayout &DL, Type *retTy, const ArgListTy &Args, | |||
926 | const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, | |||
927 | const ImmutableCallSite *CS) const { | |||
928 | auto PtrVT = getPointerTy(DL); | |||
929 | ||||
930 | bool isABI = (STI.getSmVersion() >= 20); | |||
931 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 931, __PRETTY_FUNCTION__)); | |||
932 | if (!isABI) | |||
933 | return ""; | |||
934 | ||||
935 | std::stringstream O; | |||
936 | O << "prototype_" << uniqueCallSite << " : .callprototype "; | |||
937 | ||||
938 | if (retTy->getTypeID() == Type::VoidTyID) { | |||
939 | O << "()"; | |||
940 | } else { | |||
941 | O << "("; | |||
942 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { | |||
943 | unsigned size = 0; | |||
944 | if (auto *ITy = dyn_cast<IntegerType>(retTy)) { | |||
945 | size = ITy->getBitWidth(); | |||
946 | if (size < 32) | |||
947 | size = 32; | |||
948 | } else { | |||
949 | assert(retTy->isFloatingPointTy() &&((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 950, __PRETTY_FUNCTION__)) | |||
950 | "Floating point type expected here")((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 950, __PRETTY_FUNCTION__)); | |||
951 | size = retTy->getPrimitiveSizeInBits(); | |||
952 | } | |||
953 | ||||
954 | O << ".param .b" << size << " _"; | |||
955 | } else if (isa<PointerType>(retTy)) { | |||
956 | O << ".param .b" << PtrVT.getSizeInBits() << " _"; | |||
957 | } else if ((retTy->getTypeID() == Type::StructTyID) || | |||
958 | isa<VectorType>(retTy)) { | |||
959 | auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); | |||
960 | O << ".param .align " << retAlignment << " .b8 _[" | |||
961 | << DL.getTypeAllocSize(retTy) << "]"; | |||
962 | } else { | |||
963 | llvm_unreachable("Unknown return type")::llvm::llvm_unreachable_internal("Unknown return type", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 963); | |||
964 | } | |||
965 | O << ") "; | |||
966 | } | |||
967 | O << "_ ("; | |||
968 | ||||
969 | bool first = true; | |||
970 | ||||
971 | unsigned OIdx = 0; | |||
972 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
973 | Type *Ty = Args[i].Ty; | |||
974 | if (!first) { | |||
975 | O << ", "; | |||
976 | } | |||
977 | first = false; | |||
978 | ||||
979 | if (!Outs[OIdx].Flags.isByVal()) { | |||
980 | if (Ty->isAggregateType() || Ty->isVectorTy()) { | |||
981 | unsigned align = 0; | |||
982 | const CallInst *CallI = cast<CallInst>(CS->getInstruction()); | |||
983 | // +1 because index 0 is reserved for return type alignment | |||
984 | if (!llvm::getAlign(*CallI, i + 1, align)) | |||
985 | align = DL.getABITypeAlignment(Ty); | |||
986 | unsigned sz = DL.getTypeAllocSize(Ty); | |||
987 | O << ".param .align " << align << " .b8 "; | |||
988 | O << "_"; | |||
989 | O << "[" << sz << "]"; | |||
990 | // update the index for Outs | |||
991 | SmallVector<EVT, 16> vtparts; | |||
992 | ComputeValueVTs(*this, DL, Ty, vtparts); | |||
993 | if (unsigned len = vtparts.size()) | |||
994 | OIdx += len - 1; | |||
995 | continue; | |||
996 | } | |||
997 | // i8 types in IR will be i16 types in SDAG | |||
998 | assert((getValueType(DL, Ty) == Outs[OIdx].VT ||(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1000, __PRETTY_FUNCTION__)) | |||
999 | (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1000, __PRETTY_FUNCTION__)) | |||
1000 | "type mismatch between callee prototype and arguments")(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1000, __PRETTY_FUNCTION__)); | |||
1001 | // scalar type | |||
1002 | unsigned sz = 0; | |||
1003 | if (isa<IntegerType>(Ty)) { | |||
1004 | sz = cast<IntegerType>(Ty)->getBitWidth(); | |||
1005 | if (sz < 32) | |||
1006 | sz = 32; | |||
1007 | } else if (isa<PointerType>(Ty)) | |||
1008 | sz = PtrVT.getSizeInBits(); | |||
1009 | else | |||
1010 | sz = Ty->getPrimitiveSizeInBits(); | |||
1011 | O << ".param .b" << sz << " "; | |||
1012 | O << "_"; | |||
1013 | continue; | |||
1014 | } | |||
1015 | auto *PTy = dyn_cast<PointerType>(Ty); | |||
1016 | assert(PTy && "Param with byval attribute should be a pointer type")((PTy && "Param with byval attribute should be a pointer type" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Param with byval attribute should be a pointer type\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1016, __PRETTY_FUNCTION__)); | |||
1017 | Type *ETy = PTy->getElementType(); | |||
1018 | ||||
1019 | unsigned align = Outs[OIdx].Flags.getByValAlign(); | |||
1020 | unsigned sz = DL.getTypeAllocSize(ETy); | |||
1021 | O << ".param .align " << align << " .b8 "; | |||
1022 | O << "_"; | |||
1023 | O << "[" << sz << "]"; | |||
1024 | } | |||
1025 | O << ");"; | |||
1026 | return O.str(); | |||
1027 | } | |||
1028 | ||||
1029 | unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, | |||
1030 | const ImmutableCallSite *CS, | |||
1031 | Type *Ty, unsigned Idx, | |||
1032 | const DataLayout &DL) const { | |||
1033 | if (!CS) { | |||
1034 | // CallSite is zero, fallback to ABI type alignment | |||
1035 | return DL.getABITypeAlignment(Ty); | |||
1036 | } | |||
1037 | ||||
1038 | unsigned Align = 0; | |||
1039 | const Value *DirectCallee = CS->getCalledFunction(); | |||
1040 | ||||
1041 | if (!DirectCallee) { | |||
1042 | // We don't have a direct function symbol, but that may be because of | |||
1043 | // constant cast instructions in the call. | |||
1044 | const Instruction *CalleeI = CS->getInstruction(); | |||
1045 | assert(CalleeI && "Call target is not a function or derived value?")((CalleeI && "Call target is not a function or derived value?" ) ? static_cast<void> (0) : __assert_fail ("CalleeI && \"Call target is not a function or derived value?\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1045, __PRETTY_FUNCTION__)); | |||
1046 | ||||
1047 | // With bitcast'd call targets, the instruction will be the call | |||
1048 | if (isa<CallInst>(CalleeI)) { | |||
1049 | // Check if we have call alignment metadata | |||
1050 | if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) | |||
1051 | return Align; | |||
1052 | ||||
1053 | const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); | |||
1054 | // Ignore any bitcast instructions | |||
1055 | while (isa<ConstantExpr>(CalleeV)) { | |||
1056 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); | |||
1057 | if (!CE->isCast()) | |||
1058 | break; | |||
1059 | // Look through the bitcast | |||
1060 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); | |||
1061 | } | |||
1062 | ||||
1063 | // We have now looked past all of the bitcasts. Do we finally have a | |||
1064 | // Function? | |||
1065 | if (isa<Function>(CalleeV)) | |||
1066 | DirectCallee = CalleeV; | |||
1067 | } | |||
1068 | } | |||
1069 | ||||
1070 | // Check for function alignment information if we found that the | |||
1071 | // ultimate target is a Function | |||
1072 | if (DirectCallee) | |||
1073 | if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) | |||
1074 | return Align; | |||
1075 | ||||
1076 | // Call is indirect or alignment information is not available, fall back to | |||
1077 | // the ABI type alignment | |||
1078 | return DL.getABITypeAlignment(Ty); | |||
1079 | } | |||
1080 | ||||
1081 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||
1082 | SmallVectorImpl<SDValue> &InVals) const { | |||
1083 | SelectionDAG &DAG = CLI.DAG; | |||
1084 | SDLoc dl = CLI.DL; | |||
1085 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | |||
1086 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | |||
1087 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | |||
1088 | SDValue Chain = CLI.Chain; | |||
1089 | SDValue Callee = CLI.Callee; | |||
1090 | bool &isTailCall = CLI.IsTailCall; | |||
1091 | ArgListTy &Args = CLI.getArgs(); | |||
1092 | Type *retTy = CLI.RetTy; | |||
| ||||
1093 | ImmutableCallSite *CS = CLI.CS; | |||
1094 | ||||
1095 | bool isABI = (STI.getSmVersion() >= 20); | |||
1096 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1096, __PRETTY_FUNCTION__)); | |||
1097 | if (!isABI) | |||
1098 | return Chain; | |||
1099 | MachineFunction &MF = DAG.getMachineFunction(); | |||
1100 | const Function *F = MF.getFunction(); | |||
1101 | auto &DL = MF.getDataLayout(); | |||
1102 | ||||
1103 | SDValue tempChain = Chain; | |||
1104 | Chain = DAG.getCALLSEQ_START(Chain, | |||
1105 | DAG.getIntPtrConstant(uniqueCallSite, dl, true), | |||
1106 | dl); | |||
1107 | SDValue InFlag = Chain.getValue(1); | |||
1108 | ||||
1109 | unsigned paramCount = 0; | |||
1110 | // Args.size() and Outs.size() need not match. | |||
1111 | // Outs.size() will be larger | |||
1112 | // * if there is an aggregate argument with multiple fields (each field | |||
1113 | // showing up separately in Outs) | |||
1114 | // * if there is a vector argument with more than typical vector-length | |||
1115 | // elements (generally if more than 4) where each vector element is | |||
1116 | // individually present in Outs. | |||
1117 | // So a different index should be used for indexing into Outs/OutVals. | |||
1118 | // See similar issue in LowerFormalArguments. | |||
1119 | unsigned OIdx = 0; | |||
1120 | // Declare the .params or .reg need to pass values | |||
1121 | // to the function | |||
1122 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | |||
1123 | EVT VT = Outs[OIdx].VT; | |||
1124 | Type *Ty = Args[i].Ty; | |||
1125 | ||||
1126 | if (!Outs[OIdx].Flags.isByVal()) { | |||
1127 | if (Ty->isAggregateType()) { | |||
1128 | // aggregate | |||
1129 | SmallVector<EVT, 16> vtparts; | |||
1130 | SmallVector<uint64_t, 16> Offsets; | |||
1131 | ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets, | |||
1132 | 0); | |||
1133 | ||||
1134 | unsigned align = | |||
1135 | getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); | |||
1136 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1137 | unsigned sz = DL.getTypeAllocSize(Ty); | |||
1138 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1139 | SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, | |||
1140 | MVT::i32), | |||
1141 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1142 | DAG.getConstant(sz, dl, MVT::i32), | |||
1143 | InFlag }; | |||
1144 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1145 | DeclareParamOps); | |||
1146 | InFlag = Chain.getValue(1); | |||
1147 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1148 | EVT elemtype = vtparts[j]; | |||
1149 | unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); | |||
1150 | if (elemtype.isInteger() && (sz < 8)) | |||
1151 | sz = 8; | |||
1152 | SDValue StVal = OutVals[OIdx]; | |||
1153 | if (elemtype.getSizeInBits() < 16) { | |||
1154 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); | |||
1155 | } | |||
1156 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1157 | SDValue CopyParamOps[] = { Chain, | |||
1158 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1159 | DAG.getConstant(Offsets[j], dl, MVT::i32), | |||
1160 | StVal, InFlag }; | |||
1161 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1162 | CopyParamVTs, CopyParamOps, | |||
1163 | elemtype, MachinePointerInfo(), | |||
1164 | ArgAlign); | |||
1165 | InFlag = Chain.getValue(1); | |||
1166 | ++OIdx; | |||
1167 | } | |||
1168 | if (vtparts.size() > 0) | |||
1169 | --OIdx; | |||
1170 | ++paramCount; | |||
1171 | continue; | |||
1172 | } | |||
1173 | if (Ty->isVectorTy()) { | |||
1174 | EVT ObjectVT = getValueType(DL, Ty); | |||
1175 | unsigned align = | |||
1176 | getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); | |||
1177 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1178 | unsigned sz = DL.getTypeAllocSize(Ty); | |||
1179 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1180 | SDValue DeclareParamOps[] = { Chain, | |||
1181 | DAG.getConstant(align, dl, MVT::i32), | |||
1182 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1183 | DAG.getConstant(sz, dl, MVT::i32), | |||
1184 | InFlag }; | |||
1185 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1186 | DeclareParamOps); | |||
1187 | InFlag = Chain.getValue(1); | |||
1188 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1189 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1190 | EVT MemVT = EltVT; | |||
1191 | bool NeedExtend = false; | |||
1192 | if (EltVT.getSizeInBits() < 16) { | |||
1193 | NeedExtend = true; | |||
1194 | EltVT = MVT::i16; | |||
1195 | } | |||
1196 | ||||
1197 | // V1 store | |||
1198 | if (NumElts == 1) { | |||
1199 | SDValue Elt = OutVals[OIdx++]; | |||
1200 | if (NeedExtend) | |||
1201 | Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); | |||
1202 | ||||
1203 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1204 | SDValue CopyParamOps[] = { Chain, | |||
1205 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1206 | DAG.getConstant(0, dl, MVT::i32), Elt, | |||
1207 | InFlag }; | |||
1208 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, | |||
1209 | CopyParamVTs, CopyParamOps, | |||
1210 | MemVT, MachinePointerInfo()); | |||
1211 | InFlag = Chain.getValue(1); | |||
1212 | } else if (NumElts == 2) { | |||
1213 | SDValue Elt0 = OutVals[OIdx++]; | |||
1214 | SDValue Elt1 = OutVals[OIdx++]; | |||
1215 | if (NeedExtend) { | |||
1216 | Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); | |||
1217 | Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); | |||
1218 | } | |||
1219 | ||||
1220 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1221 | SDValue CopyParamOps[] = { Chain, | |||
1222 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1223 | DAG.getConstant(0, dl, MVT::i32), Elt0, | |||
1224 | Elt1, InFlag }; | |||
1225 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, | |||
1226 | CopyParamVTs, CopyParamOps, | |||
1227 | MemVT, MachinePointerInfo()); | |||
1228 | InFlag = Chain.getValue(1); | |||
1229 | } else { | |||
1230 | unsigned curOffset = 0; | |||
1231 | // V4 stores | |||
1232 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
1233 | // the | |||
1234 | // vector will be expanded to a power of 2 elements, so we know we can | |||
1235 | // always round up to the next multiple of 4 when creating the vector | |||
1236 | // stores. | |||
1237 | // e.g. 4 elem => 1 st.v4 | |||
1238 | // 6 elem => 2 st.v4 | |||
1239 | // 8 elem => 2 st.v4 | |||
1240 | // 11 elem => 3 st.v4 | |||
1241 | unsigned VecSize = 4; | |||
1242 | if (EltVT.getSizeInBits() == 64) | |||
1243 | VecSize = 2; | |||
1244 | ||||
1245 | // This is potentially only part of a vector, so assume all elements | |||
1246 | // are packed together. | |||
1247 | unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; | |||
1248 | ||||
1249 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1250 | // Get values | |||
1251 | SDValue StoreVal; | |||
1252 | SmallVector<SDValue, 8> Ops; | |||
1253 | Ops.push_back(Chain); | |||
1254 | Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); | |||
1255 | Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32)); | |||
1256 | ||||
1257 | unsigned Opc = NVPTXISD::StoreParamV2; | |||
1258 | ||||
1259 | StoreVal = OutVals[OIdx++]; | |||
1260 | if (NeedExtend) | |||
1261 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1262 | Ops.push_back(StoreVal); | |||
1263 | ||||
1264 | if (i + 1 < NumElts) { | |||
1265 | StoreVal = OutVals[OIdx++]; | |||
1266 | if (NeedExtend) | |||
1267 | StoreVal = | |||
1268 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1269 | } else { | |||
1270 | StoreVal = DAG.getUNDEF(EltVT); | |||
1271 | } | |||
1272 | Ops.push_back(StoreVal); | |||
1273 | ||||
1274 | if (VecSize == 4) { | |||
1275 | Opc = NVPTXISD::StoreParamV4; | |||
1276 | if (i + 2 < NumElts) { | |||
1277 | StoreVal = OutVals[OIdx++]; | |||
1278 | if (NeedExtend) | |||
1279 | StoreVal = | |||
1280 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1281 | } else { | |||
1282 | StoreVal = DAG.getUNDEF(EltVT); | |||
1283 | } | |||
1284 | Ops.push_back(StoreVal); | |||
1285 | ||||
1286 | if (i + 3 < NumElts) { | |||
1287 | StoreVal = OutVals[OIdx++]; | |||
1288 | if (NeedExtend) | |||
1289 | StoreVal = | |||
1290 | DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
1291 | } else { | |||
1292 | StoreVal = DAG.getUNDEF(EltVT); | |||
1293 | } | |||
1294 | Ops.push_back(StoreVal); | |||
1295 | } | |||
1296 | ||||
1297 | Ops.push_back(InFlag); | |||
1298 | ||||
1299 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1300 | Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, | |||
1301 | MemVT, MachinePointerInfo()); | |||
1302 | InFlag = Chain.getValue(1); | |||
1303 | curOffset += PerStoreOffset; | |||
1304 | } | |||
1305 | } | |||
1306 | ++paramCount; | |||
1307 | --OIdx; | |||
1308 | continue; | |||
1309 | } | |||
1310 | // Plain scalar | |||
1311 | // for ABI, declare .param .b<size> .param<n>; | |||
1312 | unsigned sz = VT.getSizeInBits(); | |||
1313 | bool needExtend = false; | |||
1314 | if (VT.isInteger()) { | |||
1315 | if (sz < 16) | |||
1316 | needExtend = true; | |||
1317 | if (sz < 32) | |||
1318 | sz = 32; | |||
1319 | } | |||
1320 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1321 | SDValue DeclareParamOps[] = { Chain, | |||
1322 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1323 | DAG.getConstant(sz, dl, MVT::i32), | |||
1324 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1325 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, | |||
1326 | DeclareParamOps); | |||
1327 | InFlag = Chain.getValue(1); | |||
1328 | SDValue OutV = OutVals[OIdx]; | |||
1329 | if (needExtend) { | |||
1330 | // zext/sext i1 to i16 | |||
1331 | unsigned opc = ISD::ZERO_EXTEND; | |||
1332 | if (Outs[OIdx].Flags.isSExt()) | |||
1333 | opc = ISD::SIGN_EXTEND; | |||
1334 | OutV = DAG.getNode(opc, dl, MVT::i16, OutV); | |||
1335 | } | |||
1336 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1337 | SDValue CopyParamOps[] = { Chain, | |||
1338 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1339 | DAG.getConstant(0, dl, MVT::i32), OutV, | |||
1340 | InFlag }; | |||
1341 | ||||
1342 | unsigned opcode = NVPTXISD::StoreParam; | |||
1343 | if (Outs[OIdx].Flags.isZExt() && VT.getSizeInBits() < 32) | |||
1344 | opcode = NVPTXISD::StoreParamU32; | |||
1345 | else if (Outs[OIdx].Flags.isSExt() && VT.getSizeInBits() < 32) | |||
1346 | opcode = NVPTXISD::StoreParamS32; | |||
1347 | Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, | |||
1348 | VT, MachinePointerInfo()); | |||
1349 | ||||
1350 | InFlag = Chain.getValue(1); | |||
1351 | ++paramCount; | |||
1352 | continue; | |||
1353 | } | |||
1354 | // struct or vector | |||
1355 | SmallVector<EVT, 16> vtparts; | |||
1356 | SmallVector<uint64_t, 16> Offsets; | |||
1357 | auto *PTy = dyn_cast<PointerType>(Args[i].Ty); | |||
1358 | assert(PTy && "Type of a byval parameter should be pointer")((PTy && "Type of a byval parameter should be pointer" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Type of a byval parameter should be pointer\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1358, __PRETTY_FUNCTION__)); | |||
1359 | ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), | |||
1360 | vtparts, &Offsets, 0); | |||
1361 | ||||
1362 | // declare .param .align <align> .b8 .param<n>[<size>]; | |||
1363 | unsigned sz = Outs[OIdx].Flags.getByValSize(); | |||
1364 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1365 | unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); | |||
1366 | // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, | |||
1367 | // so we don't need to worry about natural alignment or not. | |||
1368 | // See TargetLowering::LowerCallTo(). | |||
1369 | ||||
1370 | // Enforce minumum alignment of 4 to work around ptxas miscompile | |||
1371 | // for sm_50+. See corresponding alignment adjustment in | |||
1372 | // emitFunctionParamList() for details. | |||
1373 | if (ArgAlign < 4) | |||
1374 | ArgAlign = 4; | |||
1375 | SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), | |||
1376 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1377 | DAG.getConstant(sz, dl, MVT::i32), InFlag}; | |||
1378 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | |||
1379 | DeclareParamOps); | |||
1380 | InFlag = Chain.getValue(1); | |||
1381 | for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { | |||
1382 | EVT elemtype = vtparts[j]; | |||
1383 | int curOffset = Offsets[j]; | |||
1384 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); | |||
1385 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
1386 | SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], | |||
1387 | DAG.getConstant(curOffset, dl, PtrVT)); | |||
1388 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, | |||
1389 | MachinePointerInfo(), PartAlign); | |||
1390 | if (elemtype.getSizeInBits() < 16) { | |||
1391 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); | |||
1392 | } | |||
1393 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1394 | SDValue CopyParamOps[] = { Chain, | |||
1395 | DAG.getConstant(paramCount, dl, MVT::i32), | |||
1396 | DAG.getConstant(curOffset, dl, MVT::i32), | |||
1397 | theVal, InFlag }; | |||
1398 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, | |||
1399 | CopyParamOps, elemtype, | |||
1400 | MachinePointerInfo()); | |||
1401 | ||||
1402 | InFlag = Chain.getValue(1); | |||
1403 | } | |||
1404 | ++paramCount; | |||
1405 | } | |||
1406 | ||||
1407 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); | |||
1408 | unsigned retAlignment = 0; | |||
1409 | ||||
1410 | // Handle Result | |||
1411 | if (Ins.size() > 0) { | |||
1412 | SmallVector<EVT, 16> resvtparts; | |||
1413 | ComputeValueVTs(*this, DL, retTy, resvtparts); | |||
1414 | ||||
1415 | // Declare | |||
1416 | // .param .align 16 .b8 retval0[<size-in-bytes>], or | |||
1417 | // .param .b<size-in-bits> retval0 | |||
1418 | unsigned resultsz = DL.getTypeAllocSizeInBits(retTy); | |||
1419 | // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for | |||
1420 | // these three types to match the logic in | |||
1421 | // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. | |||
1422 | // Plus, this behavior is consistent with nvcc's. | |||
1423 | if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || | |||
1424 | retTy->isPointerTy()) { | |||
1425 | // Scalar needs to be at least 32bit wide | |||
1426 | if (resultsz < 32) | |||
1427 | resultsz = 32; | |||
1428 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1429 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1430 | DAG.getConstant(resultsz, dl, MVT::i32), | |||
1431 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1432 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, | |||
1433 | DeclareRetOps); | |||
1434 | InFlag = Chain.getValue(1); | |||
1435 | } else { | |||
1436 | retAlignment = getArgumentAlignment(Callee, CS, retTy, 0, DL); | |||
1437 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1438 | SDValue DeclareRetOps[] = { Chain, | |||
1439 | DAG.getConstant(retAlignment, dl, MVT::i32), | |||
1440 | DAG.getConstant(resultsz / 8, dl, MVT::i32), | |||
1441 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | |||
1442 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, | |||
1443 | DeclareRetOps); | |||
1444 | InFlag = Chain.getValue(1); | |||
1445 | } | |||
1446 | } | |||
1447 | ||||
1448 | if (!Func) { | |||
1449 | // This is indirect function call case : PTX requires a prototype of the | |||
1450 | // form | |||
1451 | // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); | |||
1452 | // to be emitted, and the label has to used as the last arg of call | |||
1453 | // instruction. | |||
1454 | // The prototype is embedded in a string and put as the operand for a | |||
1455 | // CallPrototype SDNode which will print out to the value of the string. | |||
1456 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1457 | std::string Proto = | |||
1458 | getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS); | |||
1459 | const char *ProtoStr = | |||
1460 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); | |||
1461 | SDValue ProtoOps[] = { | |||
1462 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, | |||
1463 | }; | |||
1464 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); | |||
1465 | InFlag = Chain.getValue(1); | |||
1466 | } | |||
1467 | // Op to just print "call" | |||
1468 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1469 | SDValue PrintCallOps[] = { | |||
1470 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag | |||
1471 | }; | |||
1472 | // We model convergent calls as separate opcodes. | |||
1473 | unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; | |||
1474 | if (CLI.IsConvergent) | |||
1475 | Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni | |||
1476 | : NVPTXISD::PrintConvergentCall; | |||
1477 | Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); | |||
1478 | InFlag = Chain.getValue(1); | |||
1479 | ||||
1480 | // Ops to print out the function name | |||
1481 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1482 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; | |||
1483 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); | |||
1484 | InFlag = Chain.getValue(1); | |||
1485 | ||||
1486 | // Ops to print out the param list | |||
1487 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1488 | SDValue CallArgBeginOps[] = { Chain, InFlag }; | |||
1489 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, | |||
1490 | CallArgBeginOps); | |||
1491 | InFlag = Chain.getValue(1); | |||
1492 | ||||
1493 | for (unsigned i = 0, e = paramCount; i != e; ++i) { | |||
1494 | unsigned opcode; | |||
1495 | if (i == (e - 1)) | |||
1496 | opcode = NVPTXISD::LastCallArg; | |||
1497 | else | |||
1498 | opcode = NVPTXISD::CallArg; | |||
1499 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1500 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1501 | DAG.getConstant(i, dl, MVT::i32), InFlag }; | |||
1502 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); | |||
1503 | InFlag = Chain.getValue(1); | |||
1504 | } | |||
1505 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1506 | SDValue CallArgEndOps[] = { Chain, | |||
1507 | DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), | |||
1508 | InFlag }; | |||
1509 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); | |||
1510 | InFlag = Chain.getValue(1); | |||
1511 | ||||
1512 | if (!Func) { | |||
1513 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); | |||
1514 | SDValue PrototypeOps[] = { Chain, | |||
1515 | DAG.getConstant(uniqueCallSite, dl, MVT::i32), | |||
1516 | InFlag }; | |||
1517 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); | |||
1518 | InFlag = Chain.getValue(1); | |||
1519 | } | |||
1520 | ||||
1521 | // Generate loads from param memory/moves from registers for result | |||
1522 | if (Ins.size() > 0) { | |||
1523 | if (retTy && retTy->isVectorTy()) { | |||
1524 | EVT ObjectVT = getValueType(DL, retTy); | |||
1525 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
1526 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
1527 | assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1529, __PRETTY_FUNCTION__)) | |||
1528 | ObjectVT) == NumElts &&((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1529, __PRETTY_FUNCTION__)) | |||
1529 | "Vector was not scalarized")((STI.getTargetLowering()->getNumRegisters(F->getContext (), ObjectVT) == NumElts && "Vector was not scalarized" ) ? static_cast<void> (0) : __assert_fail ("STI.getTargetLowering()->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1529, __PRETTY_FUNCTION__)); | |||
1530 | unsigned sz = EltVT.getSizeInBits(); | |||
1531 | bool needTruncate = sz < 8; | |||
1532 | ||||
1533 | if (NumElts == 1) { | |||
1534 | // Just a simple load | |||
1535 | SmallVector<EVT, 4> LoadRetVTs; | |||
1536 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1537 | // If loading i1/i8 result, generate | |||
1538 | // load.b8 i16 | |||
1539 | // if i1 | |||
1540 | // trunc i16 to i1 | |||
1541 | LoadRetVTs.push_back(MVT::i16); | |||
1542 | } else | |||
1543 | LoadRetVTs.push_back(EltVT); | |||
1544 | LoadRetVTs.push_back(MVT::Other); | |||
1545 | LoadRetVTs.push_back(MVT::Glue); | |||
1546 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1547 | DAG.getConstant(0, dl, MVT::i32), InFlag}; | |||
1548 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1549 | NVPTXISD::LoadParam, dl, | |||
1550 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1551 | Chain = retval.getValue(1); | |||
1552 | InFlag = retval.getValue(2); | |||
1553 | SDValue Ret0 = retval; | |||
1554 | if (needTruncate) | |||
1555 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); | |||
1556 | InVals.push_back(Ret0); | |||
1557 | } else if (NumElts == 2) { | |||
1558 | // LoadV2 | |||
1559 | SmallVector<EVT, 4> LoadRetVTs; | |||
1560 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1561 | // If loading i1/i8 result, generate | |||
1562 | // load.b8 i16 | |||
1563 | // if i1 | |||
1564 | // trunc i16 to i1 | |||
1565 | LoadRetVTs.push_back(MVT::i16); | |||
1566 | LoadRetVTs.push_back(MVT::i16); | |||
1567 | } else { | |||
1568 | LoadRetVTs.push_back(EltVT); | |||
1569 | LoadRetVTs.push_back(EltVT); | |||
1570 | } | |||
1571 | LoadRetVTs.push_back(MVT::Other); | |||
1572 | LoadRetVTs.push_back(MVT::Glue); | |||
1573 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1574 | DAG.getConstant(0, dl, MVT::i32), InFlag}; | |||
1575 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1576 | NVPTXISD::LoadParamV2, dl, | |||
1577 | DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); | |||
1578 | Chain = retval.getValue(2); | |||
1579 | InFlag = retval.getValue(3); | |||
1580 | SDValue Ret0 = retval.getValue(0); | |||
1581 | SDValue Ret1 = retval.getValue(1); | |||
1582 | if (needTruncate) { | |||
1583 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); | |||
1584 | InVals.push_back(Ret0); | |||
1585 | Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); | |||
1586 | InVals.push_back(Ret1); | |||
1587 | } else { | |||
1588 | InVals.push_back(Ret0); | |||
1589 | InVals.push_back(Ret1); | |||
1590 | } | |||
1591 | } else { | |||
1592 | // Split into N LoadV4 | |||
1593 | unsigned Ofst = 0; | |||
1594 | unsigned VecSize = 4; | |||
1595 | unsigned Opc = NVPTXISD::LoadParamV4; | |||
1596 | if (EltVT.getSizeInBits() == 64) { | |||
1597 | VecSize = 2; | |||
1598 | Opc = NVPTXISD::LoadParamV2; | |||
1599 | } | |||
1600 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
1601 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
1602 | SmallVector<EVT, 8> LoadRetVTs; | |||
1603 | if (EltVT == MVT::i1 || EltVT == MVT::i8) { | |||
1604 | // If loading i1/i8 result, generate | |||
1605 | // load.b8 i16 | |||
1606 | // if i1 | |||
1607 | // trunc i16 to i1 | |||
1608 | for (unsigned j = 0; j < VecSize; ++j) | |||
1609 | LoadRetVTs.push_back(MVT::i16); | |||
1610 | } else { | |||
1611 | for (unsigned j = 0; j < VecSize; ++j) | |||
1612 | LoadRetVTs.push_back(EltVT); | |||
1613 | } | |||
1614 | LoadRetVTs.push_back(MVT::Other); | |||
1615 | LoadRetVTs.push_back(MVT::Glue); | |||
1616 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1617 | DAG.getConstant(Ofst, dl, MVT::i32), InFlag}; | |||
1618 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1619 | Opc, dl, DAG.getVTList(LoadRetVTs), | |||
1620 | LoadRetOps, EltVT, MachinePointerInfo()); | |||
1621 | if (VecSize == 2) { | |||
1622 | Chain = retval.getValue(2); | |||
1623 | InFlag = retval.getValue(3); | |||
1624 | } else { | |||
1625 | Chain = retval.getValue(4); | |||
1626 | InFlag = retval.getValue(5); | |||
1627 | } | |||
1628 | ||||
1629 | for (unsigned j = 0; j < VecSize; ++j) { | |||
1630 | if (i + j >= NumElts) | |||
1631 | break; | |||
1632 | SDValue Elt = retval.getValue(j); | |||
1633 | if (needTruncate) | |||
1634 | Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); | |||
1635 | InVals.push_back(Elt); | |||
1636 | } | |||
1637 | Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
1638 | } | |||
1639 | } | |||
1640 | } else { | |||
1641 | SmallVector<EVT, 16> VTs; | |||
1642 | SmallVector<uint64_t, 16> Offsets; | |||
1643 | auto &DL = DAG.getDataLayout(); | |||
1644 | ComputePTXValueVTs(*this, DL, retTy, VTs, &Offsets, 0); | |||
1645 | assert(VTs.size() == Ins.size() && "Bad value decomposition")((VTs.size() == Ins.size() && "Bad value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == Ins.size() && \"Bad value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1645, __PRETTY_FUNCTION__)); | |||
1646 | unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0, DL); | |||
1647 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { | |||
1648 | unsigned sz = VTs[i].getSizeInBits(); | |||
1649 | unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); | |||
1650 | bool needTruncate = false; | |||
1651 | if (VTs[i].isInteger() && sz < 8) { | |||
1652 | sz = 8; | |||
1653 | needTruncate = true; | |||
1654 | } | |||
1655 | ||||
1656 | SmallVector<EVT, 4> LoadRetVTs; | |||
1657 | EVT TheLoadType = VTs[i]; | |||
1658 | if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) { | |||
| ||||
1659 | // This is for integer types only, and specifically not for | |||
1660 | // aggregates. | |||
1661 | LoadRetVTs.push_back(MVT::i32); | |||
1662 | TheLoadType = MVT::i32; | |||
1663 | needTruncate = true; | |||
1664 | } else if (sz < 16) { | |||
1665 | // If loading i1/i8 result, generate | |||
1666 | // load i8 (-> i16) | |||
1667 | // trunc i16 to i1/i8 | |||
1668 | ||||
1669 | // FIXME: Do we need to set needTruncate to true here, too? We could | |||
1670 | // not figure out what this branch is for in D17872, so we left it | |||
1671 | // alone. The comment above about loading i1/i8 may be wrong, as the | |||
1672 | // branch above seems to cover integers of size < 32. | |||
1673 | LoadRetVTs.push_back(MVT::i16); | |||
1674 | } else | |||
1675 | LoadRetVTs.push_back(Ins[i].VT); | |||
1676 | LoadRetVTs.push_back(MVT::Other); | |||
1677 | LoadRetVTs.push_back(MVT::Glue); | |||
1678 | ||||
1679 | SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), | |||
1680 | DAG.getConstant(Offsets[i], dl, MVT::i32), | |||
1681 | InFlag}; | |||
1682 | SDValue retval = DAG.getMemIntrinsicNode( | |||
1683 | NVPTXISD::LoadParam, dl, | |||
1684 | DAG.getVTList(LoadRetVTs), LoadRetOps, | |||
1685 | TheLoadType, MachinePointerInfo(), AlignI); | |||
1686 | Chain = retval.getValue(1); | |||
1687 | InFlag = retval.getValue(2); | |||
1688 | SDValue Ret0 = retval.getValue(0); | |||
1689 | if (needTruncate) | |||
1690 | Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); | |||
1691 | InVals.push_back(Ret0); | |||
1692 | } | |||
1693 | } | |||
1694 | } | |||
1695 | ||||
1696 | Chain = DAG.getCALLSEQ_END(Chain, | |||
1697 | DAG.getIntPtrConstant(uniqueCallSite, dl, true), | |||
1698 | DAG.getIntPtrConstant(uniqueCallSite + 1, dl, | |||
1699 | true), | |||
1700 | InFlag, dl); | |||
1701 | uniqueCallSite++; | |||
1702 | ||||
1703 | // set isTailCall to false for now, until we figure out how to express | |||
1704 | // tail call optimization in PTX | |||
1705 | isTailCall = false; | |||
1706 | return Chain; | |||
1707 | } | |||
1708 | ||||
1709 | // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() | |||
1710 | // (see LegalizeDAG.cpp). This is slow and uses local memory. | |||
1711 | // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 | |||
1712 | SDValue | |||
1713 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { | |||
1714 | SDNode *Node = Op.getNode(); | |||
1715 | SDLoc dl(Node); | |||
1716 | SmallVector<SDValue, 8> Ops; | |||
1717 | unsigned NumOperands = Node->getNumOperands(); | |||
1718 | for (unsigned i = 0; i < NumOperands; ++i) { | |||
1719 | SDValue SubOp = Node->getOperand(i); | |||
1720 | EVT VVT = SubOp.getNode()->getValueType(0); | |||
1721 | EVT EltVT = VVT.getVectorElementType(); | |||
1722 | unsigned NumSubElem = VVT.getVectorNumElements(); | |||
1723 | for (unsigned j = 0; j < NumSubElem; ++j) { | |||
1724 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, | |||
1725 | DAG.getIntPtrConstant(j, dl))); | |||
1726 | } | |||
1727 | } | |||
1728 | return DAG.getBuildVector(Node->getValueType(0), dl, Ops); | |||
1729 | } | |||
1730 | ||||
1731 | /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which | |||
1732 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1733 | /// amount, or | |||
1734 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1735 | /// amount. | |||
1736 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, | |||
1737 | SelectionDAG &DAG) const { | |||
1738 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1738, __PRETTY_FUNCTION__)); | |||
1739 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD:: SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1739, __PRETTY_FUNCTION__)); | |||
1740 | ||||
1741 | EVT VT = Op.getValueType(); | |||
1742 | unsigned VTBits = VT.getSizeInBits(); | |||
1743 | SDLoc dl(Op); | |||
1744 | SDValue ShOpLo = Op.getOperand(0); | |||
1745 | SDValue ShOpHi = Op.getOperand(1); | |||
1746 | SDValue ShAmt = Op.getOperand(2); | |||
1747 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; | |||
1748 | ||||
1749 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | |||
1750 | ||||
1751 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1752 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1753 | // dHi = aHi >> Amt | |||
1754 | // dLo = shf.r.clamp aLo, aHi, Amt | |||
1755 | ||||
1756 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1757 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1758 | ShAmt); | |||
1759 | ||||
1760 | SDValue Ops[2] = { Lo, Hi }; | |||
1761 | return DAG.getMergeValues(Ops, dl); | |||
1762 | } | |||
1763 | else { | |||
1764 | ||||
1765 | // {dHi, dLo} = {aHi, aLo} >> Amt | |||
1766 | // - if (Amt>=size) then | |||
1767 | // dLo = aHi >> (Amt-size) | |||
1768 | // dHi = aHi >> Amt (this is either all 0 or all 1) | |||
1769 | // else | |||
1770 | // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) | |||
1771 | // dHi = aHi >> Amt | |||
1772 | ||||
1773 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1774 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1775 | ShAmt); | |||
1776 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); | |||
1777 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1778 | DAG.getConstant(VTBits, dl, MVT::i32)); | |||
1779 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); | |||
1780 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1781 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); | |||
1782 | ||||
1783 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1784 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1785 | ISD::SETGE); | |||
1786 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | |||
1787 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1788 | ||||
1789 | SDValue Ops[2] = { Lo, Hi }; | |||
1790 | return DAG.getMergeValues(Ops, dl); | |||
1791 | } | |||
1792 | } | |||
1793 | ||||
1794 | /// LowerShiftLeftParts - Lower SHL_PARTS, which | |||
1795 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | |||
1796 | /// amount, or | |||
1797 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | |||
1798 | /// amount. | |||
1799 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, | |||
1800 | SelectionDAG &DAG) const { | |||
1801 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1801, __PRETTY_FUNCTION__)); | |||
1802 | assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1802, __PRETTY_FUNCTION__)); | |||
1803 | ||||
1804 | EVT VT = Op.getValueType(); | |||
1805 | unsigned VTBits = VT.getSizeInBits(); | |||
1806 | SDLoc dl(Op); | |||
1807 | SDValue ShOpLo = Op.getOperand(0); | |||
1808 | SDValue ShOpHi = Op.getOperand(1); | |||
1809 | SDValue ShAmt = Op.getOperand(2); | |||
1810 | ||||
1811 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | |||
1812 | ||||
1813 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | |||
1814 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1815 | // dHi = shf.l.clamp aLo, aHi, Amt | |||
1816 | // dLo = aLo << Amt | |||
1817 | ||||
1818 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, | |||
1819 | ShAmt); | |||
1820 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1821 | ||||
1822 | SDValue Ops[2] = { Lo, Hi }; | |||
1823 | return DAG.getMergeValues(Ops, dl); | |||
1824 | } | |||
1825 | else { | |||
1826 | ||||
1827 | // {dHi, dLo} = {aHi, aLo} << Amt | |||
1828 | // - if (Amt>=size) then | |||
1829 | // dLo = aLo << Amt (all 0) | |||
1830 | // dLo = aLo << (Amt-size) | |||
1831 | // else | |||
1832 | // dLo = aLo << Amt | |||
1833 | // dHi = (aHi << Amt) | (aLo >> (size-Amt)) | |||
1834 | ||||
1835 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | |||
1836 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1837 | ShAmt); | |||
1838 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); | |||
1839 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | |||
1840 | DAG.getConstant(VTBits, dl, MVT::i32)); | |||
1841 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); | |||
1842 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | |||
1843 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); | |||
1844 | ||||
1845 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | |||
1846 | DAG.getConstant(VTBits, dl, MVT::i32), | |||
1847 | ISD::SETGE); | |||
1848 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | |||
1849 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | |||
1850 | ||||
1851 | SDValue Ops[2] = { Lo, Hi }; | |||
1852 | return DAG.getMergeValues(Ops, dl); | |||
1853 | } | |||
1854 | } | |||
1855 | ||||
1856 | SDValue | |||
1857 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||
1858 | switch (Op.getOpcode()) { | |||
1859 | case ISD::RETURNADDR: | |||
1860 | return SDValue(); | |||
1861 | case ISD::FRAMEADDR: | |||
1862 | return SDValue(); | |||
1863 | case ISD::GlobalAddress: | |||
1864 | return LowerGlobalAddress(Op, DAG); | |||
1865 | case ISD::INTRINSIC_W_CHAIN: | |||
1866 | return Op; | |||
1867 | case ISD::BUILD_VECTOR: | |||
1868 | case ISD::EXTRACT_SUBVECTOR: | |||
1869 | return Op; | |||
1870 | case ISD::CONCAT_VECTORS: | |||
1871 | return LowerCONCAT_VECTORS(Op, DAG); | |||
1872 | case ISD::STORE: | |||
1873 | return LowerSTORE(Op, DAG); | |||
1874 | case ISD::LOAD: | |||
1875 | return LowerLOAD(Op, DAG); | |||
1876 | case ISD::SHL_PARTS: | |||
1877 | return LowerShiftLeftParts(Op, DAG); | |||
1878 | case ISD::SRA_PARTS: | |||
1879 | case ISD::SRL_PARTS: | |||
1880 | return LowerShiftRightParts(Op, DAG); | |||
1881 | case ISD::SELECT: | |||
1882 | return LowerSelect(Op, DAG); | |||
1883 | default: | |||
1884 | llvm_unreachable("Custom lowering not defined for operation")::llvm::llvm_unreachable_internal("Custom lowering not defined for operation" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1884); | |||
1885 | } | |||
1886 | } | |||
1887 | ||||
1888 | SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { | |||
1889 | SDValue Op0 = Op->getOperand(0); | |||
1890 | SDValue Op1 = Op->getOperand(1); | |||
1891 | SDValue Op2 = Op->getOperand(2); | |||
1892 | SDLoc DL(Op.getNode()); | |||
1893 | ||||
1894 | assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1")((Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering enabled only for i1\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1894, __PRETTY_FUNCTION__)); | |||
1895 | ||||
1896 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); | |||
1897 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); | |||
1898 | SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); | |||
1899 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); | |||
1900 | ||||
1901 | return Trunc; | |||
1902 | } | |||
1903 | ||||
1904 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | |||
1905 | if (Op.getValueType() == MVT::i1) | |||
1906 | return LowerLOADi1(Op, DAG); | |||
1907 | else | |||
1908 | return SDValue(); | |||
1909 | } | |||
1910 | ||||
1911 | // v = ld i1* addr | |||
1912 | // => | |||
1913 | // v1 = ld i8* addr (-> i16) | |||
1914 | // v = trunc i16 to i1 | |||
1915 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { | |||
1916 | SDNode *Node = Op.getNode(); | |||
1917 | LoadSDNode *LD = cast<LoadSDNode>(Node); | |||
1918 | SDLoc dl(Node); | |||
1919 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD)((LD->getExtensionType() == ISD::NON_EXTLOAD) ? static_cast <void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1919, __PRETTY_FUNCTION__)); | |||
1920 | assert(Node->getValueType(0) == MVT::i1 &&((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1921, __PRETTY_FUNCTION__)) | |||
1921 | "Custom lowering for i1 load only")((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1921, __PRETTY_FUNCTION__)); | |||
1922 | SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), | |||
1923 | LD->getPointerInfo(), LD->getAlignment(), | |||
1924 | LD->getMemOperand()->getFlags()); | |||
1925 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); | |||
1926 | // The legalizer (the caller) is expecting two values from the legalized | |||
1927 | // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() | |||
1928 | // in LegalizeDAG.cpp which also uses MergeValues. | |||
1929 | SDValue Ops[] = { result, LD->getChain() }; | |||
1930 | return DAG.getMergeValues(Ops, dl); | |||
1931 | } | |||
1932 | ||||
1933 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | |||
1934 | EVT ValVT = Op.getOperand(1).getValueType(); | |||
1935 | if (ValVT == MVT::i1) | |||
1936 | return LowerSTOREi1(Op, DAG); | |||
1937 | else if (ValVT.isVector()) | |||
1938 | return LowerSTOREVector(Op, DAG); | |||
1939 | else | |||
1940 | return SDValue(); | |||
1941 | } | |||
1942 | ||||
1943 | SDValue | |||
1944 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { | |||
1945 | SDNode *N = Op.getNode(); | |||
1946 | SDValue Val = N->getOperand(1); | |||
1947 | SDLoc DL(N); | |||
1948 | EVT ValVT = Val.getValueType(); | |||
1949 | ||||
1950 | if (ValVT.isVector()) { | |||
1951 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
1952 | // legal. We can (and should) split that into 2 stores of <2 x double> here | |||
1953 | // but I'm leaving that as a TODO for now. | |||
1954 | if (!ValVT.isSimple()) | |||
1955 | return SDValue(); | |||
1956 | switch (ValVT.getSimpleVT().SimpleTy) { | |||
1957 | default: | |||
1958 | return SDValue(); | |||
1959 | case MVT::v2i8: | |||
1960 | case MVT::v2i16: | |||
1961 | case MVT::v2i32: | |||
1962 | case MVT::v2i64: | |||
1963 | case MVT::v2f32: | |||
1964 | case MVT::v2f64: | |||
1965 | case MVT::v4i8: | |||
1966 | case MVT::v4i16: | |||
1967 | case MVT::v4i32: | |||
1968 | case MVT::v4f32: | |||
1969 | // This is a "native" vector type | |||
1970 | break; | |||
1971 | } | |||
1972 | ||||
1973 | MemSDNode *MemSD = cast<MemSDNode>(N); | |||
1974 | const DataLayout &TD = DAG.getDataLayout(); | |||
1975 | ||||
1976 | unsigned Align = MemSD->getAlignment(); | |||
1977 | unsigned PrefAlign = | |||
1978 | TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); | |||
1979 | if (Align < PrefAlign) { | |||
1980 | // This store is not sufficiently aligned, so bail out and let this vector | |||
1981 | // store be scalarized. Note that we may still be able to emit smaller | |||
1982 | // vector stores. For example, if we are storing a <4 x float> with an | |||
1983 | // alignment of 8, this check will fail but the legalizer will try again | |||
1984 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
1985 | return SDValue(); | |||
1986 | } | |||
1987 | ||||
1988 | unsigned Opcode = 0; | |||
1989 | EVT EltVT = ValVT.getVectorElementType(); | |||
1990 | unsigned NumElts = ValVT.getVectorNumElements(); | |||
1991 | ||||
1992 | // Since StoreV2 is a target node, we cannot rely on DAG type legalization. | |||
1993 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
1994 | // stored type to i16 and propagate the "real" type as the memory type. | |||
1995 | bool NeedExt = false; | |||
1996 | if (EltVT.getSizeInBits() < 16) | |||
1997 | NeedExt = true; | |||
1998 | ||||
1999 | switch (NumElts) { | |||
2000 | default: | |||
2001 | return SDValue(); | |||
2002 | case 2: | |||
2003 | Opcode = NVPTXISD::StoreV2; | |||
2004 | break; | |||
2005 | case 4: { | |||
2006 | Opcode = NVPTXISD::StoreV4; | |||
2007 | break; | |||
2008 | } | |||
2009 | } | |||
2010 | ||||
2011 | SmallVector<SDValue, 8> Ops; | |||
2012 | ||||
2013 | // First is the chain | |||
2014 | Ops.push_back(N->getOperand(0)); | |||
2015 | ||||
2016 | // Then the split values | |||
2017 | for (unsigned i = 0; i < NumElts; ++i) { | |||
2018 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, | |||
2019 | DAG.getIntPtrConstant(i, DL)); | |||
2020 | if (NeedExt) | |||
2021 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); | |||
2022 | Ops.push_back(ExtVal); | |||
2023 | } | |||
2024 | ||||
2025 | // Then any remaining arguments | |||
2026 | Ops.append(N->op_begin() + 2, N->op_end()); | |||
2027 | ||||
2028 | SDValue NewSt = DAG.getMemIntrinsicNode( | |||
2029 | Opcode, DL, DAG.getVTList(MVT::Other), Ops, | |||
2030 | MemSD->getMemoryVT(), MemSD->getMemOperand()); | |||
2031 | ||||
2032 | //return DCI.CombineTo(N, NewSt, true); | |||
2033 | return NewSt; | |||
2034 | } | |||
2035 | ||||
2036 | return SDValue(); | |||
2037 | } | |||
2038 | ||||
2039 | // st i1 v, addr | |||
2040 | // => | |||
2041 | // v1 = zxt v to i16 | |||
2042 | // st.u8 i16, addr | |||
2043 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { | |||
2044 | SDNode *Node = Op.getNode(); | |||
2045 | SDLoc dl(Node); | |||
2046 | StoreSDNode *ST = cast<StoreSDNode>(Node); | |||
2047 | SDValue Tmp1 = ST->getChain(); | |||
2048 | SDValue Tmp2 = ST->getBasePtr(); | |||
2049 | SDValue Tmp3 = ST->getValue(); | |||
2050 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only")((Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only" ) ? static_cast<void> (0) : __assert_fail ("Tmp3.getValueType() == MVT::i1 && \"Custom lowering for i1 store only\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2050, __PRETTY_FUNCTION__)); | |||
2051 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); | |||
2052 | SDValue Result = | |||
2053 | DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, | |||
2054 | ST->getAlignment(), ST->getMemOperand()->getFlags()); | |||
2055 | return Result; | |||
2056 | } | |||
2057 | ||||
2058 | SDValue | |||
2059 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { | |||
2060 | std::string ParamSym; | |||
2061 | raw_string_ostream ParamStr(ParamSym); | |||
2062 | ||||
2063 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; | |||
2064 | ParamStr.flush(); | |||
2065 | ||||
2066 | std::string *SavedStr = | |||
2067 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); | |||
2068 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); | |||
2069 | } | |||
2070 | ||||
2071 | // Check to see if the kernel argument is image*_t or sampler_t | |||
2072 | ||||
2073 | static bool isImageOrSamplerVal(const Value *arg, const Module *context) { | |||
2074 | static const char *const specialTypes[] = { "struct._image2d_t", | |||
2075 | "struct._image3d_t", | |||
2076 | "struct._sampler_t" }; | |||
2077 | ||||
2078 | Type *Ty = arg->getType(); | |||
2079 | auto *PTy = dyn_cast<PointerType>(Ty); | |||
2080 | ||||
2081 | if (!PTy) | |||
2082 | return false; | |||
2083 | ||||
2084 | if (!context) | |||
2085 | return false; | |||
2086 | ||||
2087 | auto *STy = dyn_cast<StructType>(PTy->getElementType()); | |||
2088 | if (!STy || STy->isLiteral()) | |||
2089 | return false; | |||
2090 | ||||
2091 | return std::find(std::begin(specialTypes), std::end(specialTypes), | |||
2092 | STy->getName()) != std::end(specialTypes); | |||
2093 | } | |||
2094 | ||||
2095 | SDValue NVPTXTargetLowering::LowerFormalArguments( | |||
2096 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | |||
2097 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | |||
2098 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | |||
2099 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2100 | const DataLayout &DL = DAG.getDataLayout(); | |||
2101 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | |||
2102 | ||||
2103 | const Function *F = MF.getFunction(); | |||
2104 | const AttributeSet &PAL = F->getAttributes(); | |||
2105 | const TargetLowering *TLI = STI.getTargetLowering(); | |||
2106 | ||||
2107 | SDValue Root = DAG.getRoot(); | |||
2108 | std::vector<SDValue> OutChains; | |||
2109 | ||||
2110 | bool isABI = (STI.getSmVersion() >= 20); | |||
2111 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2111, __PRETTY_FUNCTION__)); | |||
2112 | if (!isABI) | |||
2113 | return Chain; | |||
2114 | ||||
2115 | std::vector<Type *> argTypes; | |||
2116 | std::vector<const Argument *> theArgs; | |||
2117 | for (const Argument &I : F->args()) { | |||
2118 | theArgs.push_back(&I); | |||
2119 | argTypes.push_back(I.getType()); | |||
2120 | } | |||
2121 | // argTypes.size() (or theArgs.size()) and Ins.size() need not match. | |||
2122 | // Ins.size() will be larger | |||
2123 | // * if there is an aggregate argument with multiple fields (each field | |||
2124 | // showing up separately in Ins) | |||
2125 | // * if there is a vector argument with more than typical vector-length | |||
2126 | // elements (generally if more than 4) where each vector element is | |||
2127 | // individually present in Ins. | |||
2128 | // So a different index should be used for indexing into Ins. | |||
2129 | // See similar issue in LowerCall. | |||
2130 | unsigned InsIdx = 0; | |||
2131 | ||||
2132 | int idx = 0; | |||
2133 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { | |||
2134 | Type *Ty = argTypes[i]; | |||
2135 | ||||
2136 | // If the kernel argument is image*_t or sampler_t, convert it to | |||
2137 | // a i32 constant holding the parameter position. This can later | |||
2138 | // matched in the AsmPrinter to output the correct mangled name. | |||
2139 | if (isImageOrSamplerVal( | |||
2140 | theArgs[i], | |||
2141 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() | |||
2142 | : nullptr))) { | |||
2143 | assert(llvm::isKernelFunction(*F) &&((llvm::isKernelFunction(*F) && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("llvm::isKernelFunction(*F) && \"Only kernels can have image/sampler params\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2144, __PRETTY_FUNCTION__)) | |||
2144 | "Only kernels can have image/sampler params")((llvm::isKernelFunction(*F) && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("llvm::isKernelFunction(*F) && \"Only kernels can have image/sampler params\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2144, __PRETTY_FUNCTION__)); | |||
2145 | InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); | |||
2146 | continue; | |||
2147 | } | |||
2148 | ||||
2149 | if (theArgs[i]->use_empty()) { | |||
2150 | // argument is dead | |||
2151 | if (Ty->isAggregateType()) { | |||
2152 | SmallVector<EVT, 16> vtparts; | |||
2153 | ||||
2154 | ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); | |||
2155 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2155, __PRETTY_FUNCTION__)); | |||
2156 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2157 | ++parti) { | |||
2158 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2159 | ++InsIdx; | |||
2160 | } | |||
2161 | if (vtparts.size() > 0) | |||
2162 | --InsIdx; | |||
2163 | continue; | |||
2164 | } | |||
2165 | if (Ty->isVectorTy()) { | |||
2166 | EVT ObjectVT = getValueType(DL, Ty); | |||
2167 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); | |||
2168 | for (unsigned parti = 0; parti < NumRegs; ++parti) { | |||
2169 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2170 | ++InsIdx; | |||
2171 | } | |||
2172 | if (NumRegs > 0) | |||
2173 | --InsIdx; | |||
2174 | continue; | |||
2175 | } | |||
2176 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | |||
2177 | continue; | |||
2178 | } | |||
2179 | ||||
2180 | // In the following cases, assign a node order of "idx+1" | |||
2181 | // to newly created nodes. The SDNodes for params have to | |||
2182 | // appear in the same order as their order of appearance | |||
2183 | // in the original function. "idx+1" holds that order. | |||
2184 | if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { | |||
2185 | if (Ty->isAggregateType()) { | |||
2186 | SmallVector<EVT, 16> vtparts; | |||
2187 | SmallVector<uint64_t, 16> offsets; | |||
2188 | ||||
2189 | // NOTE: Here, we lose the ability to issue vector loads for vectors | |||
2190 | // that are a part of a struct. This should be investigated in the | |||
2191 | // future. | |||
2192 | ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets, | |||
2193 | 0); | |||
2194 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2194, __PRETTY_FUNCTION__)); | |||
2195 | bool aggregateIsPacked = false; | |||
2196 | if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) | |||
2197 | aggregateIsPacked = STy->isPacked(); | |||
2198 | ||||
2199 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | |||
2200 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | |||
2201 | ++parti) { | |||
2202 | EVT partVT = vtparts[parti]; | |||
2203 | Value *srcValue = Constant::getNullValue( | |||
2204 | PointerType::get(partVT.getTypeForEVT(F->getContext()), | |||
2205 | llvm::ADDRESS_SPACE_PARAM)); | |||
2206 | SDValue srcAddr = | |||
2207 | DAG.getNode(ISD::ADD, dl, PtrVT, Arg, | |||
2208 | DAG.getConstant(offsets[parti], dl, PtrVT)); | |||
2209 | unsigned partAlign = aggregateIsPacked | |||
2210 | ? 1 | |||
2211 | : DL.getABITypeAlignment( | |||
2212 | partVT.getTypeForEVT(F->getContext())); | |||
2213 | SDValue p; | |||
2214 | if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { | |||
2215 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2216 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2217 | p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, | |||
2218 | MachinePointerInfo(srcValue), partVT, partAlign); | |||
2219 | } else { | |||
2220 | p = DAG.getLoad(partVT, dl, Root, srcAddr, | |||
2221 | MachinePointerInfo(srcValue), partAlign); | |||
2222 | } | |||
2223 | if (p.getNode()) | |||
2224 | p.getNode()->setIROrder(idx + 1); | |||
2225 | InVals.push_back(p); | |||
2226 | ++InsIdx; | |||
2227 | } | |||
2228 | if (vtparts.size() > 0) | |||
2229 | --InsIdx; | |||
2230 | continue; | |||
2231 | } | |||
2232 | if (Ty->isVectorTy()) { | |||
2233 | EVT ObjectVT = getValueType(DL, Ty); | |||
2234 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | |||
2235 | unsigned NumElts = ObjectVT.getVectorNumElements(); | |||
2236 | assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2237, __PRETTY_FUNCTION__)) | |||
2237 | "Vector was not scalarized")((TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized") ? static_cast<void > (0) : __assert_fail ("TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && \"Vector was not scalarized\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2237, __PRETTY_FUNCTION__)); | |||
2238 | EVT EltVT = ObjectVT.getVectorElementType(); | |||
2239 | ||||
2240 | // V1 load | |||
2241 | // f32 = load ... | |||
2242 | if (NumElts == 1) { | |||
2243 | // We only have one element, so just directly load it | |||
2244 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2245 | EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2246 | SDValue P = DAG.getLoad( | |||
2247 | EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), | |||
2248 | DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())), | |||
2249 | MachineMemOperand::MODereferenceable | | |||
2250 | MachineMemOperand::MOInvariant); | |||
2251 | if (P.getNode()) | |||
2252 | P.getNode()->setIROrder(idx + 1); | |||
2253 | ||||
2254 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2255 | P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); | |||
2256 | InVals.push_back(P); | |||
2257 | ++InsIdx; | |||
2258 | } else if (NumElts == 2) { | |||
2259 | // V2 load | |||
2260 | // f32,f32 = load ... | |||
2261 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); | |||
2262 | Value *SrcValue = Constant::getNullValue(PointerType::get( | |||
2263 | VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2264 | SDValue P = DAG.getLoad( | |||
2265 | VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), | |||
2266 | DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())), | |||
2267 | MachineMemOperand::MODereferenceable | | |||
2268 | MachineMemOperand::MOInvariant); | |||
2269 | if (P.getNode()) | |||
2270 | P.getNode()->setIROrder(idx + 1); | |||
2271 | ||||
2272 | SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2273 | DAG.getIntPtrConstant(0, dl)); | |||
2274 | SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2275 | DAG.getIntPtrConstant(1, dl)); | |||
2276 | ||||
2277 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { | |||
2278 | Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); | |||
2279 | Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); | |||
2280 | } | |||
2281 | ||||
2282 | InVals.push_back(Elt0); | |||
2283 | InVals.push_back(Elt1); | |||
2284 | InsIdx += 2; | |||
2285 | } else { | |||
2286 | // V4 loads | |||
2287 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and | |||
2288 | // the vector will be expanded to a power of 2 elements, so we know we | |||
2289 | // can always round up to the next multiple of 4 when creating the | |||
2290 | // vector loads. | |||
2291 | // e.g. 4 elem => 1 ld.v4 | |||
2292 | // 6 elem => 2 ld.v4 | |||
2293 | // 8 elem => 2 ld.v4 | |||
2294 | // 11 elem => 3 ld.v4 | |||
2295 | unsigned VecSize = 4; | |||
2296 | if (EltVT.getSizeInBits() == 64) { | |||
2297 | VecSize = 2; | |||
2298 | } | |||
2299 | EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2300 | unsigned Ofst = 0; | |||
2301 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2302 | Value *SrcValue = Constant::getNullValue( | |||
2303 | PointerType::get(VecVT.getTypeForEVT(F->getContext()), | |||
2304 | llvm::ADDRESS_SPACE_PARAM)); | |||
2305 | SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, | |||
2306 | DAG.getConstant(Ofst, dl, PtrVT)); | |||
2307 | SDValue P = DAG.getLoad( | |||
2308 | VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), | |||
2309 | DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())), | |||
2310 | MachineMemOperand::MODereferenceable | | |||
2311 | MachineMemOperand::MOInvariant); | |||
2312 | if (P.getNode()) | |||
2313 | P.getNode()->setIROrder(idx + 1); | |||
2314 | ||||
2315 | for (unsigned j = 0; j < VecSize; ++j) { | |||
2316 | if (i + j >= NumElts) | |||
2317 | break; | |||
2318 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, | |||
2319 | DAG.getIntPtrConstant(j, dl)); | |||
2320 | if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) | |||
2321 | Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); | |||
2322 | InVals.push_back(Elt); | |||
2323 | } | |||
2324 | Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2325 | } | |||
2326 | InsIdx += NumElts; | |||
2327 | } | |||
2328 | ||||
2329 | if (NumElts > 0) | |||
2330 | --InsIdx; | |||
2331 | continue; | |||
2332 | } | |||
2333 | // A plain scalar. | |||
2334 | EVT ObjectVT = getValueType(DL, Ty); | |||
2335 | // If ABI, load from the param symbol | |||
2336 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | |||
2337 | Value *srcValue = Constant::getNullValue(PointerType::get( | |||
2338 | ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); | |||
2339 | SDValue p; | |||
2340 | if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { | |||
2341 | ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? | |||
2342 | ISD::SEXTLOAD : ISD::ZEXTLOAD; | |||
2343 | p = DAG.getExtLoad( | |||
2344 | ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), | |||
2345 | ObjectVT, | |||
2346 | DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2347 | } else { | |||
2348 | p = DAG.getLoad( | |||
2349 | Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), | |||
2350 | DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); | |||
2351 | } | |||
2352 | if (p.getNode()) | |||
2353 | p.getNode()->setIROrder(idx + 1); | |||
2354 | InVals.push_back(p); | |||
2355 | continue; | |||
2356 | } | |||
2357 | ||||
2358 | // Param has ByVal attribute | |||
2359 | // Return MoveParam(param symbol). | |||
2360 | // Ideally, the param symbol can be returned directly, | |||
2361 | // but when SDNode builder decides to use it in a CopyToReg(), | |||
2362 | // machine instruction fails because TargetExternalSymbol | |||
2363 | // (not lowered) is target dependent, and CopyToReg assumes | |||
2364 | // the source is lowered. | |||
2365 | EVT ObjectVT = getValueType(DL, Ty); | |||
2366 | assert(ObjectVT == Ins[InsIdx].VT &&((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2367, __PRETTY_FUNCTION__)) | |||
2367 | "Ins type did not match function type")((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2367, __PRETTY_FUNCTION__)); | |||
2368 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | |||
2369 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); | |||
2370 | if (p.getNode()) | |||
2371 | p.getNode()->setIROrder(idx + 1); | |||
2372 | InVals.push_back(p); | |||
2373 | } | |||
2374 | ||||
2375 | // Clang will check explicit VarArg and issue error if any. However, Clang | |||
2376 | // will let code with | |||
2377 | // implicit var arg like f() pass. See bug 617733. | |||
2378 | // We treat this case as if the arg list is empty. | |||
2379 | // if (F.isVarArg()) { | |||
2380 | // assert(0 && "VarArg not supported yet!"); | |||
2381 | //} | |||
2382 | ||||
2383 | if (!OutChains.empty()) | |||
2384 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); | |||
2385 | ||||
2386 | return Chain; | |||
2387 | } | |||
2388 | ||||
2389 | SDValue | |||
2390 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | |||
2391 | bool isVarArg, | |||
2392 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |||
2393 | const SmallVectorImpl<SDValue> &OutVals, | |||
2394 | const SDLoc &dl, SelectionDAG &DAG) const { | |||
2395 | MachineFunction &MF = DAG.getMachineFunction(); | |||
2396 | const Function *F = MF.getFunction(); | |||
2397 | Type *RetTy = F->getReturnType(); | |||
2398 | const DataLayout &TD = DAG.getDataLayout(); | |||
2399 | ||||
2400 | bool isABI = (STI.getSmVersion() >= 20); | |||
2401 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2401, __PRETTY_FUNCTION__)); | |||
2402 | if (!isABI) | |||
2403 | return Chain; | |||
2404 | ||||
2405 | if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { | |||
2406 | // If we have a vector type, the OutVals array will be the scalarized | |||
2407 | // components and we have combine them into 1 or more vector stores. | |||
2408 | unsigned NumElts = VTy->getNumElements(); | |||
2409 | assert(NumElts == Outs.size() && "Bad scalarization of return value")((NumElts == Outs.size() && "Bad scalarization of return value" ) ? static_cast<void> (0) : __assert_fail ("NumElts == Outs.size() && \"Bad scalarization of return value\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2409, __PRETTY_FUNCTION__)); | |||
2410 | ||||
2411 | // const_cast can be removed in later LLVM versions | |||
2412 | EVT EltVT = getValueType(TD, RetTy).getVectorElementType(); | |||
2413 | bool NeedExtend = false; | |||
2414 | if (EltVT.getSizeInBits() < 16) | |||
2415 | NeedExtend = true; | |||
2416 | ||||
2417 | // V1 store | |||
2418 | if (NumElts == 1) { | |||
2419 | SDValue StoreVal = OutVals[0]; | |||
2420 | // We only have one element, so just directly store it | |||
2421 | if (NeedExtend) | |||
2422 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); | |||
2423 | SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal }; | |||
2424 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2425 | DAG.getVTList(MVT::Other), Ops, | |||
2426 | EltVT, MachinePointerInfo()); | |||
2427 | ||||
2428 | } else if (NumElts == 2) { | |||
2429 | // V2 store | |||
2430 | SDValue StoreVal0 = OutVals[0]; | |||
2431 | SDValue StoreVal1 = OutVals[1]; | |||
2432 | ||||
2433 | if (NeedExtend) { | |||
2434 | StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); | |||
2435 | StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); | |||
2436 | } | |||
2437 | ||||
2438 | SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0, | |||
2439 | StoreVal1 }; | |||
2440 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, | |||
2441 | DAG.getVTList(MVT::Other), Ops, | |||
2442 | EltVT, MachinePointerInfo()); | |||
2443 | } else { | |||
2444 | // V4 stores | |||
2445 | // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the | |||
2446 | // vector will be expanded to a power of 2 elements, so we know we can | |||
2447 | // always round up to the next multiple of 4 when creating the vector | |||
2448 | // stores. | |||
2449 | // e.g. 4 elem => 1 st.v4 | |||
2450 | // 6 elem => 2 st.v4 | |||
2451 | // 8 elem => 2 st.v4 | |||
2452 | // 11 elem => 3 st.v4 | |||
2453 | ||||
2454 | unsigned VecSize = 4; | |||
2455 | if (OutVals[0].getValueSizeInBits() == 64) | |||
2456 | VecSize = 2; | |||
2457 | ||||
2458 | unsigned Offset = 0; | |||
2459 | ||||
2460 | EVT VecVT = | |||
2461 | EVT::getVectorVT(F->getContext(), EltVT, VecSize); | |||
2462 | unsigned PerStoreOffset = | |||
2463 | TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); | |||
2464 | ||||
2465 | for (unsigned i = 0; i < NumElts; i += VecSize) { | |||
2466 | // Get values | |||
2467 | SDValue StoreVal; | |||
2468 | SmallVector<SDValue, 8> Ops; | |||
2469 | Ops.push_back(Chain); | |||
2470 | Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32)); | |||
2471 | unsigned Opc = NVPTXISD::StoreRetvalV2; | |||
2472 | EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); | |||
2473 | ||||
2474 | StoreVal = OutVals[i]; | |||
2475 | if (NeedExtend) | |||
2476 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2477 | Ops.push_back(StoreVal); | |||
2478 | ||||
2479 | if (i + 1 < NumElts) { | |||
2480 | StoreVal = OutVals[i + 1]; | |||
2481 | if (NeedExtend) | |||
2482 | StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2483 | } else { | |||
2484 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2485 | } | |||
2486 | Ops.push_back(StoreVal); | |||
2487 | ||||
2488 | if (VecSize == 4) { | |||
2489 | Opc = NVPTXISD::StoreRetvalV4; | |||
2490 | if (i + 2 < NumElts) { | |||
2491 | StoreVal = OutVals[i + 2]; | |||
2492 | if (NeedExtend) | |||
2493 | StoreVal = | |||
2494 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2495 | } else { | |||
2496 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2497 | } | |||
2498 | Ops.push_back(StoreVal); | |||
2499 | ||||
2500 | if (i + 3 < NumElts) { | |||
2501 | StoreVal = OutVals[i + 3]; | |||
2502 | if (NeedExtend) | |||
2503 | StoreVal = | |||
2504 | DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); | |||
2505 | } else { | |||
2506 | StoreVal = DAG.getUNDEF(ExtendedVT); | |||
2507 | } | |||
2508 | Ops.push_back(StoreVal); | |||
2509 | } | |||
2510 | ||||
2511 | // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); | |||
2512 | Chain = | |||
2513 | DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, | |||
2514 | EltVT, MachinePointerInfo()); | |||
2515 | Offset += PerStoreOffset; | |||
2516 | } | |||
2517 | } | |||
2518 | } else { | |||
2519 | SmallVector<EVT, 16> ValVTs; | |||
2520 | SmallVector<uint64_t, 16> Offsets; | |||
2521 | ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0); | |||
2522 | assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition")((ValVTs.size() == OutVals.size() && "Bad return value decomposition" ) ? static_cast<void> (0) : __assert_fail ("ValVTs.size() == OutVals.size() && \"Bad return value decomposition\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2522, __PRETTY_FUNCTION__)); | |||
2523 | ||||
2524 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { | |||
2525 | SDValue theVal = OutVals[i]; | |||
2526 | EVT TheValType = theVal.getValueType(); | |||
2527 | unsigned numElems = 1; | |||
2528 | if (TheValType.isVector()) | |||
2529 | numElems = TheValType.getVectorNumElements(); | |||
2530 | for (unsigned j = 0, je = numElems; j != je; ++j) { | |||
2531 | SDValue TmpVal = theVal; | |||
2532 | if (TheValType.isVector()) | |||
2533 | TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, | |||
2534 | TheValType.getVectorElementType(), TmpVal, | |||
2535 | DAG.getIntPtrConstant(j, dl)); | |||
2536 | EVT TheStoreType = ValVTs[i]; | |||
2537 | if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) { | |||
2538 | // The following zero-extension is for integer types only, and | |||
2539 | // specifically not for aggregates. | |||
2540 | TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); | |||
2541 | TheStoreType = MVT::i32; | |||
2542 | } | |||
2543 | else if (TmpVal.getValueSizeInBits() < 16) | |||
2544 | TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); | |||
2545 | ||||
2546 | SDValue Ops[] = { | |||
2547 | Chain, | |||
2548 | DAG.getConstant(Offsets[i], dl, MVT::i32), | |||
2549 | TmpVal }; | |||
2550 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, | |||
2551 | DAG.getVTList(MVT::Other), Ops, | |||
2552 | TheStoreType, | |||
2553 | MachinePointerInfo()); | |||
2554 | } | |||
2555 | } | |||
2556 | } | |||
2557 | ||||
2558 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); | |||
2559 | } | |||
2560 | ||||
2561 | ||||
2562 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( | |||
2563 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, | |||
2564 | SelectionDAG &DAG) const { | |||
2565 | if (Constraint.length() > 1) | |||
2566 | return; | |||
2567 | else | |||
2568 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); | |||
2569 | } | |||
2570 | ||||
2571 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { | |||
2572 | switch (Intrinsic) { | |||
2573 | default: | |||
2574 | return 0; | |||
2575 | ||||
2576 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
2577 | return NVPTXISD::Tex1DFloatS32; | |||
2578 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
2579 | return NVPTXISD::Tex1DFloatFloat; | |||
2580 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
2581 | return NVPTXISD::Tex1DFloatFloatLevel; | |||
2582 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
2583 | return NVPTXISD::Tex1DFloatFloatGrad; | |||
2584 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
2585 | return NVPTXISD::Tex1DS32S32; | |||
2586 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
2587 | return NVPTXISD::Tex1DS32Float; | |||
2588 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
2589 | return NVPTXISD::Tex1DS32FloatLevel; | |||
2590 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
2591 | return NVPTXISD::Tex1DS32FloatGrad; | |||
2592 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
2593 | return NVPTXISD::Tex1DU32S32; | |||
2594 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
2595 | return NVPTXISD::Tex1DU32Float; | |||
2596 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
2597 | return NVPTXISD::Tex1DU32FloatLevel; | |||
2598 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
2599 | return NVPTXISD::Tex1DU32FloatGrad; | |||
2600 | ||||
2601 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
2602 | return NVPTXISD::Tex1DArrayFloatS32; | |||
2603 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
2604 | return NVPTXISD::Tex1DArrayFloatFloat; | |||
2605 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
2606 | return NVPTXISD::Tex1DArrayFloatFloatLevel; | |||
2607 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
2608 | return NVPTXISD::Tex1DArrayFloatFloatGrad; | |||
2609 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
2610 | return NVPTXISD::Tex1DArrayS32S32; | |||
2611 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
2612 | return NVPTXISD::Tex1DArrayS32Float; | |||
2613 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
2614 | return NVPTXISD::Tex1DArrayS32FloatLevel; | |||
2615 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
2616 | return NVPTXISD::Tex1DArrayS32FloatGrad; | |||
2617 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
2618 | return NVPTXISD::Tex1DArrayU32S32; | |||
2619 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
2620 | return NVPTXISD::Tex1DArrayU32Float; | |||
2621 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
2622 | return NVPTXISD::Tex1DArrayU32FloatLevel; | |||
2623 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
2624 | return NVPTXISD::Tex1DArrayU32FloatGrad; | |||
2625 | ||||
2626 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
2627 | return NVPTXISD::Tex2DFloatS32; | |||
2628 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
2629 | return NVPTXISD::Tex2DFloatFloat; | |||
2630 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
2631 | return NVPTXISD::Tex2DFloatFloatLevel; | |||
2632 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
2633 | return NVPTXISD::Tex2DFloatFloatGrad; | |||
2634 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
2635 | return NVPTXISD::Tex2DS32S32; | |||
2636 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
2637 | return NVPTXISD::Tex2DS32Float; | |||
2638 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
2639 | return NVPTXISD::Tex2DS32FloatLevel; | |||
2640 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
2641 | return NVPTXISD::Tex2DS32FloatGrad; | |||
2642 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
2643 | return NVPTXISD::Tex2DU32S32; | |||
2644 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
2645 | return NVPTXISD::Tex2DU32Float; | |||
2646 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
2647 | return NVPTXISD::Tex2DU32FloatLevel; | |||
2648 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
2649 | return NVPTXISD::Tex2DU32FloatGrad; | |||
2650 | ||||
2651 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
2652 | return NVPTXISD::Tex2DArrayFloatS32; | |||
2653 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
2654 | return NVPTXISD::Tex2DArrayFloatFloat; | |||
2655 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
2656 | return NVPTXISD::Tex2DArrayFloatFloatLevel; | |||
2657 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
2658 | return NVPTXISD::Tex2DArrayFloatFloatGrad; | |||
2659 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
2660 | return NVPTXISD::Tex2DArrayS32S32; | |||
2661 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
2662 | return NVPTXISD::Tex2DArrayS32Float; | |||
2663 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
2664 | return NVPTXISD::Tex2DArrayS32FloatLevel; | |||
2665 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
2666 | return NVPTXISD::Tex2DArrayS32FloatGrad; | |||
2667 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
2668 | return NVPTXISD::Tex2DArrayU32S32; | |||
2669 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
2670 | return NVPTXISD::Tex2DArrayU32Float; | |||
2671 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
2672 | return NVPTXISD::Tex2DArrayU32FloatLevel; | |||
2673 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
2674 | return NVPTXISD::Tex2DArrayU32FloatGrad; | |||
2675 | ||||
2676 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
2677 | return NVPTXISD::Tex3DFloatS32; | |||
2678 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
2679 | return NVPTXISD::Tex3DFloatFloat; | |||
2680 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
2681 | return NVPTXISD::Tex3DFloatFloatLevel; | |||
2682 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
2683 | return NVPTXISD::Tex3DFloatFloatGrad; | |||
2684 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
2685 | return NVPTXISD::Tex3DS32S32; | |||
2686 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
2687 | return NVPTXISD::Tex3DS32Float; | |||
2688 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
2689 | return NVPTXISD::Tex3DS32FloatLevel; | |||
2690 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
2691 | return NVPTXISD::Tex3DS32FloatGrad; | |||
2692 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
2693 | return NVPTXISD::Tex3DU32S32; | |||
2694 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
2695 | return NVPTXISD::Tex3DU32Float; | |||
2696 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
2697 | return NVPTXISD::Tex3DU32FloatLevel; | |||
2698 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
2699 | return NVPTXISD::Tex3DU32FloatGrad; | |||
2700 | ||||
2701 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
2702 | return NVPTXISD::TexCubeFloatFloat; | |||
2703 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
2704 | return NVPTXISD::TexCubeFloatFloatLevel; | |||
2705 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
2706 | return NVPTXISD::TexCubeS32Float; | |||
2707 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
2708 | return NVPTXISD::TexCubeS32FloatLevel; | |||
2709 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
2710 | return NVPTXISD::TexCubeU32Float; | |||
2711 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
2712 | return NVPTXISD::TexCubeU32FloatLevel; | |||
2713 | ||||
2714 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
2715 | return NVPTXISD::TexCubeArrayFloatFloat; | |||
2716 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
2717 | return NVPTXISD::TexCubeArrayFloatFloatLevel; | |||
2718 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
2719 | return NVPTXISD::TexCubeArrayS32Float; | |||
2720 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
2721 | return NVPTXISD::TexCubeArrayS32FloatLevel; | |||
2722 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
2723 | return NVPTXISD::TexCubeArrayU32Float; | |||
2724 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
2725 | return NVPTXISD::TexCubeArrayU32FloatLevel; | |||
2726 | ||||
2727 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
2728 | return NVPTXISD::Tld4R2DFloatFloat; | |||
2729 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
2730 | return NVPTXISD::Tld4G2DFloatFloat; | |||
2731 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
2732 | return NVPTXISD::Tld4B2DFloatFloat; | |||
2733 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
2734 | return NVPTXISD::Tld4A2DFloatFloat; | |||
2735 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
2736 | return NVPTXISD::Tld4R2DS64Float; | |||
2737 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
2738 | return NVPTXISD::Tld4G2DS64Float; | |||
2739 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
2740 | return NVPTXISD::Tld4B2DS64Float; | |||
2741 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
2742 | return NVPTXISD::Tld4A2DS64Float; | |||
2743 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
2744 | return NVPTXISD::Tld4R2DU64Float; | |||
2745 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
2746 | return NVPTXISD::Tld4G2DU64Float; | |||
2747 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
2748 | return NVPTXISD::Tld4B2DU64Float; | |||
2749 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
2750 | return NVPTXISD::Tld4A2DU64Float; | |||
2751 | ||||
2752 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
2753 | return NVPTXISD::TexUnified1DFloatS32; | |||
2754 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
2755 | return NVPTXISD::TexUnified1DFloatFloat; | |||
2756 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
2757 | return NVPTXISD::TexUnified1DFloatFloatLevel; | |||
2758 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
2759 | return NVPTXISD::TexUnified1DFloatFloatGrad; | |||
2760 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
2761 | return NVPTXISD::TexUnified1DS32S32; | |||
2762 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
2763 | return NVPTXISD::TexUnified1DS32Float; | |||
2764 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
2765 | return NVPTXISD::TexUnified1DS32FloatLevel; | |||
2766 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
2767 | return NVPTXISD::TexUnified1DS32FloatGrad; | |||
2768 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
2769 | return NVPTXISD::TexUnified1DU32S32; | |||
2770 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
2771 | return NVPTXISD::TexUnified1DU32Float; | |||
2772 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
2773 | return NVPTXISD::TexUnified1DU32FloatLevel; | |||
2774 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
2775 | return NVPTXISD::TexUnified1DU32FloatGrad; | |||
2776 | ||||
2777 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
2778 | return NVPTXISD::TexUnified1DArrayFloatS32; | |||
2779 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
2780 | return NVPTXISD::TexUnified1DArrayFloatFloat; | |||
2781 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
2782 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; | |||
2783 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
2784 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; | |||
2785 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
2786 | return NVPTXISD::TexUnified1DArrayS32S32; | |||
2787 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
2788 | return NVPTXISD::TexUnified1DArrayS32Float; | |||
2789 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
2790 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; | |||
2791 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
2792 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; | |||
2793 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
2794 | return NVPTXISD::TexUnified1DArrayU32S32; | |||
2795 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
2796 | return NVPTXISD::TexUnified1DArrayU32Float; | |||
2797 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
2798 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; | |||
2799 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
2800 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; | |||
2801 | ||||
2802 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
2803 | return NVPTXISD::TexUnified2DFloatS32; | |||
2804 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
2805 | return NVPTXISD::TexUnified2DFloatFloat; | |||
2806 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
2807 | return NVPTXISD::TexUnified2DFloatFloatLevel; | |||
2808 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
2809 | return NVPTXISD::TexUnified2DFloatFloatGrad; | |||
2810 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
2811 | return NVPTXISD::TexUnified2DS32S32; | |||
2812 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
2813 | return NVPTXISD::TexUnified2DS32Float; | |||
2814 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
2815 | return NVPTXISD::TexUnified2DS32FloatLevel; | |||
2816 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
2817 | return NVPTXISD::TexUnified2DS32FloatGrad; | |||
2818 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
2819 | return NVPTXISD::TexUnified2DU32S32; | |||
2820 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
2821 | return NVPTXISD::TexUnified2DU32Float; | |||
2822 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
2823 | return NVPTXISD::TexUnified2DU32FloatLevel; | |||
2824 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
2825 | return NVPTXISD::TexUnified2DU32FloatGrad; | |||
2826 | ||||
2827 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
2828 | return NVPTXISD::TexUnified2DArrayFloatS32; | |||
2829 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
2830 | return NVPTXISD::TexUnified2DArrayFloatFloat; | |||
2831 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
2832 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; | |||
2833 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
2834 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; | |||
2835 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
2836 | return NVPTXISD::TexUnified2DArrayS32S32; | |||
2837 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
2838 | return NVPTXISD::TexUnified2DArrayS32Float; | |||
2839 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
2840 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; | |||
2841 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
2842 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; | |||
2843 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
2844 | return NVPTXISD::TexUnified2DArrayU32S32; | |||
2845 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
2846 | return NVPTXISD::TexUnified2DArrayU32Float; | |||
2847 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
2848 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; | |||
2849 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
2850 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; | |||
2851 | ||||
2852 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
2853 | return NVPTXISD::TexUnified3DFloatS32; | |||
2854 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
2855 | return NVPTXISD::TexUnified3DFloatFloat; | |||
2856 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
2857 | return NVPTXISD::TexUnified3DFloatFloatLevel; | |||
2858 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
2859 | return NVPTXISD::TexUnified3DFloatFloatGrad; | |||
2860 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
2861 | return NVPTXISD::TexUnified3DS32S32; | |||
2862 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
2863 | return NVPTXISD::TexUnified3DS32Float; | |||
2864 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
2865 | return NVPTXISD::TexUnified3DS32FloatLevel; | |||
2866 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
2867 | return NVPTXISD::TexUnified3DS32FloatGrad; | |||
2868 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
2869 | return NVPTXISD::TexUnified3DU32S32; | |||
2870 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
2871 | return NVPTXISD::TexUnified3DU32Float; | |||
2872 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
2873 | return NVPTXISD::TexUnified3DU32FloatLevel; | |||
2874 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
2875 | return NVPTXISD::TexUnified3DU32FloatGrad; | |||
2876 | ||||
2877 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
2878 | return NVPTXISD::TexUnifiedCubeFloatFloat; | |||
2879 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
2880 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; | |||
2881 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
2882 | return NVPTXISD::TexUnifiedCubeS32Float; | |||
2883 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
2884 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; | |||
2885 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
2886 | return NVPTXISD::TexUnifiedCubeU32Float; | |||
2887 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
2888 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; | |||
2889 | ||||
2890 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
2891 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; | |||
2892 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
2893 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; | |||
2894 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
2895 | return NVPTXISD::TexUnifiedCubeArrayS32Float; | |||
2896 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
2897 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; | |||
2898 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
2899 | return NVPTXISD::TexUnifiedCubeArrayU32Float; | |||
2900 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
2901 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; | |||
2902 | ||||
2903 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
2904 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; | |||
2905 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
2906 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; | |||
2907 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
2908 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; | |||
2909 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: | |||
2910 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; | |||
2911 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
2912 | return NVPTXISD::Tld4UnifiedR2DS64Float; | |||
2913 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
2914 | return NVPTXISD::Tld4UnifiedG2DS64Float; | |||
2915 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
2916 | return NVPTXISD::Tld4UnifiedB2DS64Float; | |||
2917 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
2918 | return NVPTXISD::Tld4UnifiedA2DS64Float; | |||
2919 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
2920 | return NVPTXISD::Tld4UnifiedR2DU64Float; | |||
2921 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
2922 | return NVPTXISD::Tld4UnifiedG2DU64Float; | |||
2923 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
2924 | return NVPTXISD::Tld4UnifiedB2DU64Float; | |||
2925 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: | |||
2926 | return NVPTXISD::Tld4UnifiedA2DU64Float; | |||
2927 | } | |||
2928 | } | |||
2929 | ||||
2930 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { | |||
2931 | switch (Intrinsic) { | |||
2932 | default: | |||
2933 | return 0; | |||
2934 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
2935 | return NVPTXISD::Suld1DI8Clamp; | |||
2936 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
2937 | return NVPTXISD::Suld1DI16Clamp; | |||
2938 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
2939 | return NVPTXISD::Suld1DI32Clamp; | |||
2940 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
2941 | return NVPTXISD::Suld1DI64Clamp; | |||
2942 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
2943 | return NVPTXISD::Suld1DV2I8Clamp; | |||
2944 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
2945 | return NVPTXISD::Suld1DV2I16Clamp; | |||
2946 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
2947 | return NVPTXISD::Suld1DV2I32Clamp; | |||
2948 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
2949 | return NVPTXISD::Suld1DV2I64Clamp; | |||
2950 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
2951 | return NVPTXISD::Suld1DV4I8Clamp; | |||
2952 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
2953 | return NVPTXISD::Suld1DV4I16Clamp; | |||
2954 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
2955 | return NVPTXISD::Suld1DV4I32Clamp; | |||
2956 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
2957 | return NVPTXISD::Suld1DArrayI8Clamp; | |||
2958 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
2959 | return NVPTXISD::Suld1DArrayI16Clamp; | |||
2960 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
2961 | return NVPTXISD::Suld1DArrayI32Clamp; | |||
2962 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
2963 | return NVPTXISD::Suld1DArrayI64Clamp; | |||
2964 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
2965 | return NVPTXISD::Suld1DArrayV2I8Clamp; | |||
2966 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
2967 | return NVPTXISD::Suld1DArrayV2I16Clamp; | |||
2968 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
2969 | return NVPTXISD::Suld1DArrayV2I32Clamp; | |||
2970 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
2971 | return NVPTXISD::Suld1DArrayV2I64Clamp; | |||
2972 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
2973 | return NVPTXISD::Suld1DArrayV4I8Clamp; | |||
2974 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
2975 | return NVPTXISD::Suld1DArrayV4I16Clamp; | |||
2976 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
2977 | return NVPTXISD::Suld1DArrayV4I32Clamp; | |||
2978 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
2979 | return NVPTXISD::Suld2DI8Clamp; | |||
2980 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
2981 | return NVPTXISD::Suld2DI16Clamp; | |||
2982 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
2983 | return NVPTXISD::Suld2DI32Clamp; | |||
2984 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
2985 | return NVPTXISD::Suld2DI64Clamp; | |||
2986 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
2987 | return NVPTXISD::Suld2DV2I8Clamp; | |||
2988 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
2989 | return NVPTXISD::Suld2DV2I16Clamp; | |||
2990 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
2991 | return NVPTXISD::Suld2DV2I32Clamp; | |||
2992 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
2993 | return NVPTXISD::Suld2DV2I64Clamp; | |||
2994 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
2995 | return NVPTXISD::Suld2DV4I8Clamp; | |||
2996 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
2997 | return NVPTXISD::Suld2DV4I16Clamp; | |||
2998 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
2999 | return NVPTXISD::Suld2DV4I32Clamp; | |||
3000 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
3001 | return NVPTXISD::Suld2DArrayI8Clamp; | |||
3002 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
3003 | return NVPTXISD::Suld2DArrayI16Clamp; | |||
3004 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
3005 | return NVPTXISD::Suld2DArrayI32Clamp; | |||
3006 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
3007 | return NVPTXISD::Suld2DArrayI64Clamp; | |||
3008 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
3009 | return NVPTXISD::Suld2DArrayV2I8Clamp; | |||
3010 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
3011 | return NVPTXISD::Suld2DArrayV2I16Clamp; | |||
3012 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
3013 | return NVPTXISD::Suld2DArrayV2I32Clamp; | |||
3014 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
3015 | return NVPTXISD::Suld2DArrayV2I64Clamp; | |||
3016 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
3017 | return NVPTXISD::Suld2DArrayV4I8Clamp; | |||
3018 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
3019 | return NVPTXISD::Suld2DArrayV4I16Clamp; | |||
3020 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
3021 | return NVPTXISD::Suld2DArrayV4I32Clamp; | |||
3022 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
3023 | return NVPTXISD::Suld3DI8Clamp; | |||
3024 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
3025 | return NVPTXISD::Suld3DI16Clamp; | |||
3026 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
3027 | return NVPTXISD::Suld3DI32Clamp; | |||
3028 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
3029 | return NVPTXISD::Suld3DI64Clamp; | |||
3030 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
3031 | return NVPTXISD::Suld3DV2I8Clamp; | |||
3032 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
3033 | return NVPTXISD::Suld3DV2I16Clamp; | |||
3034 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
3035 | return NVPTXISD::Suld3DV2I32Clamp; | |||
3036 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
3037 | return NVPTXISD::Suld3DV2I64Clamp; | |||
3038 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
3039 | return NVPTXISD::Suld3DV4I8Clamp; | |||
3040 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
3041 | return NVPTXISD::Suld3DV4I16Clamp; | |||
3042 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
3043 | return NVPTXISD::Suld3DV4I32Clamp; | |||
3044 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
3045 | return NVPTXISD::Suld1DI8Trap; | |||
3046 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
3047 | return NVPTXISD::Suld1DI16Trap; | |||
3048 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
3049 | return NVPTXISD::Suld1DI32Trap; | |||
3050 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3051 | return NVPTXISD::Suld1DI64Trap; | |||
3052 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3053 | return NVPTXISD::Suld1DV2I8Trap; | |||
3054 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3055 | return NVPTXISD::Suld1DV2I16Trap; | |||
3056 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3057 | return NVPTXISD::Suld1DV2I32Trap; | |||
3058 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3059 | return NVPTXISD::Suld1DV2I64Trap; | |||
3060 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3061 | return NVPTXISD::Suld1DV4I8Trap; | |||
3062 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3063 | return NVPTXISD::Suld1DV4I16Trap; | |||
3064 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3065 | return NVPTXISD::Suld1DV4I32Trap; | |||
3066 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3067 | return NVPTXISD::Suld1DArrayI8Trap; | |||
3068 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3069 | return NVPTXISD::Suld1DArrayI16Trap; | |||
3070 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3071 | return NVPTXISD::Suld1DArrayI32Trap; | |||
3072 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3073 | return NVPTXISD::Suld1DArrayI64Trap; | |||
3074 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3075 | return NVPTXISD::Suld1DArrayV2I8Trap; | |||
3076 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3077 | return NVPTXISD::Suld1DArrayV2I16Trap; | |||
3078 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3079 | return NVPTXISD::Suld1DArrayV2I32Trap; | |||
3080 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3081 | return NVPTXISD::Suld1DArrayV2I64Trap; | |||
3082 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3083 | return NVPTXISD::Suld1DArrayV4I8Trap; | |||
3084 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3085 | return NVPTXISD::Suld1DArrayV4I16Trap; | |||
3086 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3087 | return NVPTXISD::Suld1DArrayV4I32Trap; | |||
3088 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3089 | return NVPTXISD::Suld2DI8Trap; | |||
3090 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3091 | return NVPTXISD::Suld2DI16Trap; | |||
3092 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3093 | return NVPTXISD::Suld2DI32Trap; | |||
3094 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3095 | return NVPTXISD::Suld2DI64Trap; | |||
3096 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3097 | return NVPTXISD::Suld2DV2I8Trap; | |||
3098 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3099 | return NVPTXISD::Suld2DV2I16Trap; | |||
3100 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3101 | return NVPTXISD::Suld2DV2I32Trap; | |||
3102 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3103 | return NVPTXISD::Suld2DV2I64Trap; | |||
3104 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3105 | return NVPTXISD::Suld2DV4I8Trap; | |||
3106 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3107 | return NVPTXISD::Suld2DV4I16Trap; | |||
3108 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3109 | return NVPTXISD::Suld2DV4I32Trap; | |||
3110 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3111 | return NVPTXISD::Suld2DArrayI8Trap; | |||
3112 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3113 | return NVPTXISD::Suld2DArrayI16Trap; | |||
3114 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3115 | return NVPTXISD::Suld2DArrayI32Trap; | |||
3116 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3117 | return NVPTXISD::Suld2DArrayI64Trap; | |||
3118 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3119 | return NVPTXISD::Suld2DArrayV2I8Trap; | |||
3120 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3121 | return NVPTXISD::Suld2DArrayV2I16Trap; | |||
3122 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3123 | return NVPTXISD::Suld2DArrayV2I32Trap; | |||
3124 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3125 | return NVPTXISD::Suld2DArrayV2I64Trap; | |||
3126 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3127 | return NVPTXISD::Suld2DArrayV4I8Trap; | |||
3128 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3129 | return NVPTXISD::Suld2DArrayV4I16Trap; | |||
3130 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3131 | return NVPTXISD::Suld2DArrayV4I32Trap; | |||
3132 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3133 | return NVPTXISD::Suld3DI8Trap; | |||
3134 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3135 | return NVPTXISD::Suld3DI16Trap; | |||
3136 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3137 | return NVPTXISD::Suld3DI32Trap; | |||
3138 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3139 | return NVPTXISD::Suld3DI64Trap; | |||
3140 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3141 | return NVPTXISD::Suld3DV2I8Trap; | |||
3142 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3143 | return NVPTXISD::Suld3DV2I16Trap; | |||
3144 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3145 | return NVPTXISD::Suld3DV2I32Trap; | |||
3146 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3147 | return NVPTXISD::Suld3DV2I64Trap; | |||
3148 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3149 | return NVPTXISD::Suld3DV4I8Trap; | |||
3150 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3151 | return NVPTXISD::Suld3DV4I16Trap; | |||
3152 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3153 | return NVPTXISD::Suld3DV4I32Trap; | |||
3154 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3155 | return NVPTXISD::Suld1DI8Zero; | |||
3156 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3157 | return NVPTXISD::Suld1DI16Zero; | |||
3158 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3159 | return NVPTXISD::Suld1DI32Zero; | |||
3160 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3161 | return NVPTXISD::Suld1DI64Zero; | |||
3162 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3163 | return NVPTXISD::Suld1DV2I8Zero; | |||
3164 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3165 | return NVPTXISD::Suld1DV2I16Zero; | |||
3166 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3167 | return NVPTXISD::Suld1DV2I32Zero; | |||
3168 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3169 | return NVPTXISD::Suld1DV2I64Zero; | |||
3170 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3171 | return NVPTXISD::Suld1DV4I8Zero; | |||
3172 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3173 | return NVPTXISD::Suld1DV4I16Zero; | |||
3174 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3175 | return NVPTXISD::Suld1DV4I32Zero; | |||
3176 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3177 | return NVPTXISD::Suld1DArrayI8Zero; | |||
3178 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3179 | return NVPTXISD::Suld1DArrayI16Zero; | |||
3180 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3181 | return NVPTXISD::Suld1DArrayI32Zero; | |||
3182 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3183 | return NVPTXISD::Suld1DArrayI64Zero; | |||
3184 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3185 | return NVPTXISD::Suld1DArrayV2I8Zero; | |||
3186 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3187 | return NVPTXISD::Suld1DArrayV2I16Zero; | |||
3188 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3189 | return NVPTXISD::Suld1DArrayV2I32Zero; | |||
3190 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3191 | return NVPTXISD::Suld1DArrayV2I64Zero; | |||
3192 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3193 | return NVPTXISD::Suld1DArrayV4I8Zero; | |||
3194 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3195 | return NVPTXISD::Suld1DArrayV4I16Zero; | |||
3196 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3197 | return NVPTXISD::Suld1DArrayV4I32Zero; | |||
3198 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3199 | return NVPTXISD::Suld2DI8Zero; | |||
3200 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3201 | return NVPTXISD::Suld2DI16Zero; | |||
3202 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3203 | return NVPTXISD::Suld2DI32Zero; | |||
3204 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3205 | return NVPTXISD::Suld2DI64Zero; | |||
3206 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3207 | return NVPTXISD::Suld2DV2I8Zero; | |||
3208 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3209 | return NVPTXISD::Suld2DV2I16Zero; | |||
3210 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3211 | return NVPTXISD::Suld2DV2I32Zero; | |||
3212 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3213 | return NVPTXISD::Suld2DV2I64Zero; | |||
3214 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3215 | return NVPTXISD::Suld2DV4I8Zero; | |||
3216 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3217 | return NVPTXISD::Suld2DV4I16Zero; | |||
3218 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3219 | return NVPTXISD::Suld2DV4I32Zero; | |||
3220 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3221 | return NVPTXISD::Suld2DArrayI8Zero; | |||
3222 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3223 | return NVPTXISD::Suld2DArrayI16Zero; | |||
3224 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3225 | return NVPTXISD::Suld2DArrayI32Zero; | |||
3226 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3227 | return NVPTXISD::Suld2DArrayI64Zero; | |||
3228 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3229 | return NVPTXISD::Suld2DArrayV2I8Zero; | |||
3230 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3231 | return NVPTXISD::Suld2DArrayV2I16Zero; | |||
3232 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3233 | return NVPTXISD::Suld2DArrayV2I32Zero; | |||
3234 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3235 | return NVPTXISD::Suld2DArrayV2I64Zero; | |||
3236 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3237 | return NVPTXISD::Suld2DArrayV4I8Zero; | |||
3238 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3239 | return NVPTXISD::Suld2DArrayV4I16Zero; | |||
3240 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3241 | return NVPTXISD::Suld2DArrayV4I32Zero; | |||
3242 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3243 | return NVPTXISD::Suld3DI8Zero; | |||
3244 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3245 | return NVPTXISD::Suld3DI16Zero; | |||
3246 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3247 | return NVPTXISD::Suld3DI32Zero; | |||
3248 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3249 | return NVPTXISD::Suld3DI64Zero; | |||
3250 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3251 | return NVPTXISD::Suld3DV2I8Zero; | |||
3252 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3253 | return NVPTXISD::Suld3DV2I16Zero; | |||
3254 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3255 | return NVPTXISD::Suld3DV2I32Zero; | |||
3256 | case Intrinsic::nvvm_suld_3d_v2i64_zero: | |||
3257 | return NVPTXISD::Suld3DV2I64Zero; | |||
3258 | case Intrinsic::nvvm_suld_3d_v4i8_zero: | |||
3259 | return NVPTXISD::Suld3DV4I8Zero; | |||
3260 | case Intrinsic::nvvm_suld_3d_v4i16_zero: | |||
3261 | return NVPTXISD::Suld3DV4I16Zero; | |||
3262 | case Intrinsic::nvvm_suld_3d_v4i32_zero: | |||
3263 | return NVPTXISD::Suld3DV4I32Zero; | |||
3264 | } | |||
3265 | } | |||
3266 | ||||
3267 | // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as | |||
3268 | // TgtMemIntrinsic | |||
3269 | // because we need the information that is only available in the "Value" type | |||
3270 | // of destination | |||
3271 | // pointer. In particular, the address space information. | |||
3272 | bool NVPTXTargetLowering::getTgtMemIntrinsic( | |||
3273 | IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { | |||
3274 | switch (Intrinsic) { | |||
3275 | default: | |||
3276 | return false; | |||
3277 | ||||
3278 | case Intrinsic::nvvm_atomic_load_add_f32: | |||
3279 | case Intrinsic::nvvm_atomic_load_inc_32: | |||
3280 | case Intrinsic::nvvm_atomic_load_dec_32: | |||
3281 | ||||
3282 | case Intrinsic::nvvm_atomic_add_gen_f_cta: | |||
3283 | case Intrinsic::nvvm_atomic_add_gen_f_sys: | |||
3284 | case Intrinsic::nvvm_atomic_add_gen_i_cta: | |||
3285 | case Intrinsic::nvvm_atomic_add_gen_i_sys: | |||
3286 | case Intrinsic::nvvm_atomic_and_gen_i_cta: | |||
3287 | case Intrinsic::nvvm_atomic_and_gen_i_sys: | |||
3288 | case Intrinsic::nvvm_atomic_cas_gen_i_cta: | |||
3289 | case Intrinsic::nvvm_atomic_cas_gen_i_sys: | |||
3290 | case Intrinsic::nvvm_atomic_dec_gen_i_cta: | |||
3291 | case Intrinsic::nvvm_atomic_dec_gen_i_sys: | |||
3292 | case Intrinsic::nvvm_atomic_inc_gen_i_cta: | |||
3293 | case Intrinsic::nvvm_atomic_inc_gen_i_sys: | |||
3294 | case Intrinsic::nvvm_atomic_max_gen_i_cta: | |||
3295 | case Intrinsic::nvvm_atomic_max_gen_i_sys: | |||
3296 | case Intrinsic::nvvm_atomic_min_gen_i_cta: | |||
3297 | case Intrinsic::nvvm_atomic_min_gen_i_sys: | |||
3298 | case Intrinsic::nvvm_atomic_or_gen_i_cta: | |||
3299 | case Intrinsic::nvvm_atomic_or_gen_i_sys: | |||
3300 | case Intrinsic::nvvm_atomic_exch_gen_i_cta: | |||
3301 | case Intrinsic::nvvm_atomic_exch_gen_i_sys: | |||
3302 | case Intrinsic::nvvm_atomic_xor_gen_i_cta: | |||
3303 | case Intrinsic::nvvm_atomic_xor_gen_i_sys: { | |||
3304 | auto &DL = I.getModule()->getDataLayout(); | |||
3305 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3306 | Info.memVT = getValueType(DL, I.getType()); | |||
3307 | Info.ptrVal = I.getArgOperand(0); | |||
3308 | Info.offset = 0; | |||
3309 | Info.vol = 0; | |||
3310 | Info.readMem = true; | |||
3311 | Info.writeMem = true; | |||
3312 | Info.align = 0; | |||
3313 | return true; | |||
3314 | } | |||
3315 | ||||
3316 | case Intrinsic::nvvm_ldu_global_i: | |||
3317 | case Intrinsic::nvvm_ldu_global_f: | |||
3318 | case Intrinsic::nvvm_ldu_global_p: { | |||
3319 | auto &DL = I.getModule()->getDataLayout(); | |||
3320 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3321 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) | |||
3322 | Info.memVT = getValueType(DL, I.getType()); | |||
3323 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) | |||
3324 | Info.memVT = getPointerTy(DL); | |||
3325 | else | |||
3326 | Info.memVT = getValueType(DL, I.getType()); | |||
3327 | Info.ptrVal = I.getArgOperand(0); | |||
3328 | Info.offset = 0; | |||
3329 | Info.vol = 0; | |||
3330 | Info.readMem = true; | |||
3331 | Info.writeMem = false; | |||
3332 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3333 | ||||
3334 | return true; | |||
3335 | } | |||
3336 | case Intrinsic::nvvm_ldg_global_i: | |||
3337 | case Intrinsic::nvvm_ldg_global_f: | |||
3338 | case Intrinsic::nvvm_ldg_global_p: { | |||
3339 | auto &DL = I.getModule()->getDataLayout(); | |||
3340 | ||||
3341 | Info.opc = ISD::INTRINSIC_W_CHAIN; | |||
3342 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) | |||
3343 | Info.memVT = getValueType(DL, I.getType()); | |||
3344 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) | |||
3345 | Info.memVT = getPointerTy(DL); | |||
3346 | else | |||
3347 | Info.memVT = getValueType(DL, I.getType()); | |||
3348 | Info.ptrVal = I.getArgOperand(0); | |||
3349 | Info.offset = 0; | |||
3350 | Info.vol = 0; | |||
3351 | Info.readMem = true; | |||
3352 | Info.writeMem = false; | |||
3353 | Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); | |||
3354 | ||||
3355 | return true; | |||
3356 | } | |||
3357 | ||||
3358 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | |||
3359 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | |||
3360 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | |||
3361 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | |||
3362 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | |||
3363 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | |||
3364 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | |||
3365 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | |||
3366 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | |||
3367 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | |||
3368 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | |||
3369 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | |||
3370 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | |||
3371 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | |||
3372 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | |||
3373 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | |||
3374 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | |||
3375 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | |||
3376 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | |||
3377 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | |||
3378 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | |||
3379 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | |||
3380 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | |||
3381 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | |||
3382 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | |||
3383 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | |||
3384 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | |||
3385 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | |||
3386 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | |||
3387 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | |||
3388 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | |||
3389 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | |||
3390 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | |||
3391 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | |||
3392 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | |||
3393 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | |||
3394 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | |||
3395 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | |||
3396 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | |||
3397 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | |||
3398 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | |||
3399 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | |||
3400 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | |||
3401 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | |||
3402 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | |||
3403 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | |||
3404 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | |||
3405 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | |||
3406 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | |||
3407 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | |||
3408 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | |||
3409 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | |||
3410 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | |||
3411 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | |||
3412 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | |||
3413 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { | |||
3414 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3415 | Info.memVT = MVT::v4f32; | |||
3416 | Info.ptrVal = nullptr; | |||
3417 | Info.offset = 0; | |||
3418 | Info.vol = 0; | |||
3419 | Info.readMem = true; | |||
3420 | Info.writeMem = false; | |||
3421 | Info.align = 16; | |||
3422 | return true; | |||
3423 | } | |||
3424 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | |||
3425 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | |||
3426 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | |||
3427 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | |||
3428 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | |||
3429 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | |||
3430 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | |||
3431 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | |||
3432 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | |||
3433 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | |||
3434 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | |||
3435 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | |||
3436 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | |||
3437 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | |||
3438 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | |||
3439 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | |||
3440 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | |||
3441 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | |||
3442 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | |||
3443 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | |||
3444 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | |||
3445 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | |||
3446 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | |||
3447 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | |||
3448 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | |||
3449 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | |||
3450 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | |||
3451 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | |||
3452 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | |||
3453 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | |||
3454 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | |||
3455 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | |||
3456 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | |||
3457 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | |||
3458 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | |||
3459 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | |||
3460 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | |||
3461 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | |||
3462 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | |||
3463 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | |||
3464 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | |||
3465 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | |||
3466 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | |||
3467 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | |||
3468 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | |||
3469 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | |||
3470 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | |||
3471 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | |||
3472 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | |||
3473 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | |||
3474 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | |||
3475 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | |||
3476 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | |||
3477 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | |||
3478 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | |||
3479 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | |||
3480 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | |||
3481 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | |||
3482 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | |||
3483 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | |||
3484 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | |||
3485 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | |||
3486 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | |||
3487 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | |||
3488 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | |||
3489 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | |||
3490 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | |||
3491 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | |||
3492 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | |||
3493 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | |||
3494 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | |||
3495 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | |||
3496 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | |||
3497 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | |||
3498 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | |||
3499 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | |||
3500 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | |||
3501 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | |||
3502 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | |||
3503 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | |||
3504 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | |||
3505 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | |||
3506 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | |||
3507 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | |||
3508 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | |||
3509 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | |||
3510 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | |||
3511 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | |||
3512 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | |||
3513 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | |||
3514 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | |||
3515 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | |||
3516 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | |||
3517 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | |||
3518 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | |||
3519 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | |||
3520 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | |||
3521 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | |||
3522 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | |||
3523 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | |||
3524 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | |||
3525 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | |||
3526 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | |||
3527 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | |||
3528 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | |||
3529 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | |||
3530 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | |||
3531 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | |||
3532 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | |||
3533 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | |||
3534 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | |||
3535 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { | |||
3536 | Info.opc = getOpcForTextureInstr(Intrinsic); | |||
3537 | Info.memVT = MVT::v4i32; | |||
3538 | Info.ptrVal = nullptr; | |||
3539 | Info.offset = 0; | |||
3540 | Info.vol = 0; | |||
3541 | Info.readMem = true; | |||
3542 | Info.writeMem = false; | |||
3543 | Info.align = 16; | |||
3544 | return true; | |||
3545 | } | |||
3546 | case Intrinsic::nvvm_suld_1d_i8_clamp: | |||
3547 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | |||
3548 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | |||
3549 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | |||
3550 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | |||
3551 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | |||
3552 | case Intrinsic::nvvm_suld_2d_i8_clamp: | |||
3553 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | |||
3554 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | |||
3555 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | |||
3556 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | |||
3557 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | |||
3558 | case Intrinsic::nvvm_suld_3d_i8_clamp: | |||
3559 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | |||
3560 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | |||
3561 | case Intrinsic::nvvm_suld_1d_i8_trap: | |||
3562 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | |||
3563 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | |||
3564 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | |||
3565 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | |||
3566 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | |||
3567 | case Intrinsic::nvvm_suld_2d_i8_trap: | |||
3568 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | |||
3569 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | |||
3570 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | |||
3571 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | |||
3572 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | |||
3573 | case Intrinsic::nvvm_suld_3d_i8_trap: | |||
3574 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | |||
3575 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | |||
3576 | case Intrinsic::nvvm_suld_1d_i8_zero: | |||
3577 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | |||
3578 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | |||
3579 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | |||
3580 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | |||
3581 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | |||
3582 | case Intrinsic::nvvm_suld_2d_i8_zero: | |||
3583 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | |||
3584 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | |||
3585 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | |||
3586 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | |||
3587 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | |||
3588 | case Intrinsic::nvvm_suld_3d_i8_zero: | |||
3589 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | |||
3590 | case Intrinsic::nvvm_suld_3d_v4i8_zero: { | |||
3591 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3592 | Info.memVT = MVT::i8; | |||
3593 | Info.ptrVal = nullptr; | |||
3594 | Info.offset = 0; | |||
3595 | Info.vol = 0; | |||
3596 | Info.readMem = true; | |||
3597 | Info.writeMem = false; | |||
3598 | Info.align = 16; | |||
3599 | return true; | |||
3600 | } | |||
3601 | case Intrinsic::nvvm_suld_1d_i16_clamp: | |||
3602 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | |||
3603 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | |||
3604 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | |||
3605 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | |||
3606 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | |||
3607 | case Intrinsic::nvvm_suld_2d_i16_clamp: | |||
3608 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | |||
3609 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | |||
3610 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | |||
3611 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | |||
3612 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | |||
3613 | case Intrinsic::nvvm_suld_3d_i16_clamp: | |||
3614 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | |||
3615 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | |||
3616 | case Intrinsic::nvvm_suld_1d_i16_trap: | |||
3617 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | |||
3618 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | |||
3619 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | |||
3620 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | |||
3621 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | |||
3622 | case Intrinsic::nvvm_suld_2d_i16_trap: | |||
3623 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | |||
3624 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | |||
3625 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | |||
3626 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | |||
3627 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | |||
3628 | case Intrinsic::nvvm_suld_3d_i16_trap: | |||
3629 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | |||
3630 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | |||
3631 | case Intrinsic::nvvm_suld_1d_i16_zero: | |||
3632 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | |||
3633 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | |||
3634 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | |||
3635 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | |||
3636 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | |||
3637 | case Intrinsic::nvvm_suld_2d_i16_zero: | |||
3638 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | |||
3639 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | |||
3640 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | |||
3641 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | |||
3642 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | |||
3643 | case Intrinsic::nvvm_suld_3d_i16_zero: | |||
3644 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | |||
3645 | case Intrinsic::nvvm_suld_3d_v4i16_zero: { | |||
3646 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3647 | Info.memVT = MVT::i16; | |||
3648 | Info.ptrVal = nullptr; | |||
3649 | Info.offset = 0; | |||
3650 | Info.vol = 0; | |||
3651 | Info.readMem = true; | |||
3652 | Info.writeMem = false; | |||
3653 | Info.align = 16; | |||
3654 | return true; | |||
3655 | } | |||
3656 | case Intrinsic::nvvm_suld_1d_i32_clamp: | |||
3657 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | |||
3658 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | |||
3659 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | |||
3660 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | |||
3661 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | |||
3662 | case Intrinsic::nvvm_suld_2d_i32_clamp: | |||
3663 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | |||
3664 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | |||
3665 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | |||
3666 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | |||
3667 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | |||
3668 | case Intrinsic::nvvm_suld_3d_i32_clamp: | |||
3669 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | |||
3670 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | |||
3671 | case Intrinsic::nvvm_suld_1d_i32_trap: | |||
3672 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | |||
3673 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | |||
3674 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | |||
3675 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | |||
3676 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | |||
3677 | case Intrinsic::nvvm_suld_2d_i32_trap: | |||
3678 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | |||
3679 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | |||
3680 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | |||
3681 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | |||
3682 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | |||
3683 | case Intrinsic::nvvm_suld_3d_i32_trap: | |||
3684 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | |||
3685 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | |||
3686 | case Intrinsic::nvvm_suld_1d_i32_zero: | |||
3687 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | |||
3688 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | |||
3689 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | |||
3690 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | |||
3691 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | |||
3692 | case Intrinsic::nvvm_suld_2d_i32_zero: | |||
3693 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | |||
3694 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | |||
3695 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | |||
3696 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | |||
3697 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | |||
3698 | case Intrinsic::nvvm_suld_3d_i32_zero: | |||
3699 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | |||
3700 | case Intrinsic::nvvm_suld_3d_v4i32_zero: { | |||
3701 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3702 | Info.memVT = MVT::i32; | |||
3703 | Info.ptrVal = nullptr; | |||
3704 | Info.offset = 0; | |||
3705 | Info.vol = 0; | |||
3706 | Info.readMem = true; | |||
3707 | Info.writeMem = false; | |||
3708 | Info.align = 16; | |||
3709 | return true; | |||
3710 | } | |||
3711 | case Intrinsic::nvvm_suld_1d_i64_clamp: | |||
3712 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | |||
3713 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | |||
3714 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | |||
3715 | case Intrinsic::nvvm_suld_2d_i64_clamp: | |||
3716 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | |||
3717 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | |||
3718 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | |||
3719 | case Intrinsic::nvvm_suld_3d_i64_clamp: | |||
3720 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | |||
3721 | case Intrinsic::nvvm_suld_1d_i64_trap: | |||
3722 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | |||
3723 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | |||
3724 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | |||
3725 | case Intrinsic::nvvm_suld_2d_i64_trap: | |||
3726 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | |||
3727 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | |||
3728 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | |||
3729 | case Intrinsic::nvvm_suld_3d_i64_trap: | |||
3730 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | |||
3731 | case Intrinsic::nvvm_suld_1d_i64_zero: | |||
3732 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | |||
3733 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | |||
3734 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | |||
3735 | case Intrinsic::nvvm_suld_2d_i64_zero: | |||
3736 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | |||
3737 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | |||
3738 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | |||
3739 | case Intrinsic::nvvm_suld_3d_i64_zero: | |||
3740 | case Intrinsic::nvvm_suld_3d_v2i64_zero: { | |||
3741 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | |||
3742 | Info.memVT = MVT::i64; | |||
3743 | Info.ptrVal = nullptr; | |||
3744 | Info.offset = 0; | |||
3745 | Info.vol = 0; | |||
3746 | Info.readMem = true; | |||
3747 | Info.writeMem = false; | |||
3748 | Info.align = 16; | |||
3749 | return true; | |||
3750 | } | |||
3751 | } | |||
3752 | return false; | |||
3753 | } | |||
3754 | ||||
3755 | /// isLegalAddressingMode - Return true if the addressing mode represented | |||
3756 | /// by AM is legal for this target, for a load/store of the specified type. | |||
3757 | /// Used to guide target specific optimizations, like loop strength reduction | |||
3758 | /// (LoopStrengthReduce.cpp) and memory optimization for address mode | |||
3759 | /// (CodeGenPrepare.cpp) | |||
3760 | bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, | |||
3761 | const AddrMode &AM, Type *Ty, | |||
3762 | unsigned AS) const { | |||
3763 | ||||
3764 | // AddrMode - This represents an addressing mode of: | |||
3765 | // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | |||
3766 | // | |||
3767 | // The legal address modes are | |||
3768 | // - [avar] | |||
3769 | // - [areg] | |||
3770 | // - [areg+immoff] | |||
3771 | // - [immAddr] | |||
3772 | ||||
3773 | if (AM.BaseGV) { | |||
3774 | return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; | |||
3775 | } | |||
3776 | ||||
3777 | switch (AM.Scale) { | |||
3778 | case 0: // "r", "r+i" or "i" is allowed | |||
3779 | break; | |||
3780 | case 1: | |||
3781 | if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. | |||
3782 | return false; | |||
3783 | // Otherwise we have r+i. | |||
3784 | break; | |||
3785 | default: | |||
3786 | // No scale > 1 is allowed | |||
3787 | return false; | |||
3788 | } | |||
3789 | return true; | |||
3790 | } | |||
3791 | ||||
3792 | //===----------------------------------------------------------------------===// | |||
3793 | // NVPTX Inline Assembly Support | |||
3794 | //===----------------------------------------------------------------------===// | |||
3795 | ||||
3796 | /// getConstraintType - Given a constraint letter, return the type of | |||
3797 | /// constraint it is for this target. | |||
3798 | NVPTXTargetLowering::ConstraintType | |||
3799 | NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { | |||
3800 | if (Constraint.size() == 1) { | |||
3801 | switch (Constraint[0]) { | |||
3802 | default: | |||
3803 | break; | |||
3804 | case 'b': | |||
3805 | case 'r': | |||
3806 | case 'h': | |||
3807 | case 'c': | |||
3808 | case 'l': | |||
3809 | case 'f': | |||
3810 | case 'd': | |||
3811 | case '0': | |||
3812 | case 'N': | |||
3813 | return C_RegisterClass; | |||
3814 | } | |||
3815 | } | |||
3816 | return TargetLowering::getConstraintType(Constraint); | |||
3817 | } | |||
3818 | ||||
3819 | std::pair<unsigned, const TargetRegisterClass *> | |||
3820 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, | |||
3821 | StringRef Constraint, | |||
3822 | MVT VT) const { | |||
3823 | if (Constraint.size() == 1) { | |||
3824 | switch (Constraint[0]) { | |||
3825 | case 'b': | |||
3826 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); | |||
3827 | case 'c': | |||
3828 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3829 | case 'h': | |||
3830 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | |||
3831 | case 'r': | |||
3832 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); | |||
3833 | case 'l': | |||
3834 | case 'N': | |||
3835 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); | |||
3836 | case 'f': | |||
3837 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); | |||
3838 | case 'd': | |||
3839 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); | |||
3840 | } | |||
3841 | } | |||
3842 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); | |||
3843 | } | |||
3844 | ||||
3845 | //===----------------------------------------------------------------------===// | |||
3846 | // NVPTX DAG Combining | |||
3847 | //===----------------------------------------------------------------------===// | |||
3848 | ||||
3849 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, | |||
3850 | CodeGenOpt::Level OptLevel) const { | |||
3851 | const Function *F = MF.getFunction(); | |||
3852 | const TargetOptions &TO = MF.getTarget().Options; | |||
3853 | ||||
3854 | // Always honor command-line argument | |||
3855 | if (FMAContractLevelOpt.getNumOccurrences() > 0) { | |||
3856 | return FMAContractLevelOpt > 0; | |||
3857 | } else if (OptLevel == 0) { | |||
3858 | // Do not contract if we're not optimizing the code | |||
3859 | return false; | |||
3860 | } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { | |||
3861 | // Honor TargetOptions flags that explicitly say fusion is okay | |||
3862 | return true; | |||
3863 | } else if (F->hasFnAttribute("unsafe-fp-math")) { | |||
3864 | // Check for unsafe-fp-math=true coming from Clang | |||
3865 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); | |||
3866 | StringRef Val = Attr.getValueAsString(); | |||
3867 | if (Val == "true") | |||
3868 | return true; | |||
3869 | } | |||
3870 | ||||
3871 | // We did not have a clear indication that fusion is allowed, so assume not | |||
3872 | return false; | |||
3873 | } | |||
3874 | ||||
3875 | /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with | |||
3876 | /// operands N0 and N1. This is a helper for PerformADDCombine that is | |||
3877 | /// called with the default operands, and if that fails, with commuted | |||
3878 | /// operands. | |||
3879 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, | |||
3880 | TargetLowering::DAGCombinerInfo &DCI, | |||
3881 | const NVPTXSubtarget &Subtarget, | |||
3882 | CodeGenOpt::Level OptLevel) { | |||
3883 | SelectionDAG &DAG = DCI.DAG; | |||
3884 | // Skip non-integer, non-scalar case | |||
3885 | EVT VT=N0.getValueType(); | |||
3886 | if (VT.isVector()) | |||
3887 | return SDValue(); | |||
3888 | ||||
3889 | // fold (add (mul a, b), c) -> (mad a, b, c) | |||
3890 | // | |||
3891 | if (N0.getOpcode() == ISD::MUL) { | |||
3892 | assert (VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 3892, __PRETTY_FUNCTION__)); | |||
3893 | // For integer: | |||
3894 | // Since integer multiply-add costs the same as integer multiply | |||
3895 | // but is more costly than integer add, do the fusion only when | |||
3896 | // the mul is only used in the add. | |||
3897 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || | |||
3898 | !N0.getNode()->hasOneUse()) | |||
3899 | return SDValue(); | |||
3900 | ||||
3901 | // Do the folding | |||
3902 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, | |||
3903 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3904 | } | |||
3905 | else if (N0.getOpcode() == ISD::FMUL) { | |||
3906 | if (VT == MVT::f32 || VT == MVT::f64) { | |||
3907 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( | |||
3908 | &DAG.getTargetLoweringInfo()); | |||
3909 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) | |||
3910 | return SDValue(); | |||
3911 | ||||
3912 | // For floating point: | |||
3913 | // Do the fusion only when the mul has less than 5 uses and all | |||
3914 | // are add. | |||
3915 | // The heuristic is that if a use is not an add, then that use | |||
3916 | // cannot be fused into fma, therefore mul is still needed anyway. | |||
3917 | // If there are more than 4 uses, even if they are all add, fusing | |||
3918 | // them will increase register pressue. | |||
3919 | // | |||
3920 | int numUses = 0; | |||
3921 | int nonAddCount = 0; | |||
3922 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), | |||
3923 | UE = N0.getNode()->use_end(); | |||
3924 | UI != UE; ++UI) { | |||
3925 | numUses++; | |||
3926 | SDNode *User = *UI; | |||
3927 | if (User->getOpcode() != ISD::FADD) | |||
3928 | ++nonAddCount; | |||
3929 | } | |||
3930 | if (numUses >= 5) | |||
3931 | return SDValue(); | |||
3932 | if (nonAddCount) { | |||
3933 | int orderNo = N->getIROrder(); | |||
3934 | int orderNo2 = N0.getNode()->getIROrder(); | |||
3935 | // simple heuristics here for considering potential register | |||
3936 | // pressure, the logics here is that the differnce are used | |||
3937 | // to measure the distance between def and use, the longer distance | |||
3938 | // more likely cause register pressure. | |||
3939 | if (orderNo - orderNo2 < 500) | |||
3940 | return SDValue(); | |||
3941 | ||||
3942 | // Now, check if at least one of the FMUL's operands is live beyond the node N, | |||
3943 | // which guarantees that the FMA will not increase register pressure at node N. | |||
3944 | bool opIsLive = false; | |||
3945 | const SDNode *left = N0.getOperand(0).getNode(); | |||
3946 | const SDNode *right = N0.getOperand(1).getNode(); | |||
3947 | ||||
3948 | if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) | |||
3949 | opIsLive = true; | |||
3950 | ||||
3951 | if (!opIsLive) | |||
3952 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { | |||
3953 | SDNode *User = *UI; | |||
3954 | int orderNo3 = User->getIROrder(); | |||
3955 | if (orderNo3 > orderNo) { | |||
3956 | opIsLive = true; | |||
3957 | break; | |||
3958 | } | |||
3959 | } | |||
3960 | ||||
3961 | if (!opIsLive) | |||
3962 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { | |||
3963 | SDNode *User = *UI; | |||
3964 | int orderNo3 = User->getIROrder(); | |||
3965 | if (orderNo3 > orderNo) { | |||
3966 | opIsLive = true; | |||
3967 | break; | |||
3968 | } | |||
3969 | } | |||
3970 | ||||
3971 | if (!opIsLive) | |||
3972 | return SDValue(); | |||
3973 | } | |||
3974 | ||||
3975 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, | |||
3976 | N0.getOperand(0), N0.getOperand(1), N1); | |||
3977 | } | |||
3978 | } | |||
3979 | ||||
3980 | return SDValue(); | |||
3981 | } | |||
3982 | ||||
3983 | /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. | |||
3984 | /// | |||
3985 | static SDValue PerformADDCombine(SDNode *N, | |||
3986 | TargetLowering::DAGCombinerInfo &DCI, | |||
3987 | const NVPTXSubtarget &Subtarget, | |||
3988 | CodeGenOpt::Level OptLevel) { | |||
3989 | SDValue N0 = N->getOperand(0); | |||
3990 | SDValue N1 = N->getOperand(1); | |||
3991 | ||||
3992 | // First try with the default operand order. | |||
3993 | if (SDValue Result = | |||
3994 | PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) | |||
3995 | return Result; | |||
3996 | ||||
3997 | // If that didn't work, try again with the operands commuted. | |||
3998 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); | |||
3999 | } | |||
4000 | ||||
4001 | static SDValue PerformANDCombine(SDNode *N, | |||
4002 | TargetLowering::DAGCombinerInfo &DCI) { | |||
4003 | // The type legalizer turns a vector load of i8 values into a zextload to i16 | |||
4004 | // registers, optionally ANY_EXTENDs it (if target type is integer), | |||
4005 | // and ANDs off the high 8 bits. Since we turn this load into a | |||
4006 | // target-specific DAG node, the DAG combiner fails to eliminate these AND | |||
4007 | // nodes. Do that here. | |||
4008 | SDValue Val = N->getOperand(0); | |||
4009 | SDValue Mask = N->getOperand(1); | |||
4010 | ||||
4011 | if (isa<ConstantSDNode>(Val)) { | |||
4012 | std::swap(Val, Mask); | |||
4013 | } | |||
4014 | ||||
4015 | SDValue AExt; | |||
4016 | // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and | |||
4017 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | |||
4018 | AExt = Val; | |||
4019 | Val = Val->getOperand(0); | |||
4020 | } | |||
4021 | ||||
4022 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { | |||
4023 | Val = Val->getOperand(0); | |||
4024 | } | |||
4025 | ||||
4026 | if (Val->getOpcode() == NVPTXISD::LoadV2 || | |||
4027 | Val->getOpcode() == NVPTXISD::LoadV4) { | |||
4028 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); | |||
4029 | if (!MaskCnst) { | |||
4030 | // Not an AND with a constant | |||
4031 | return SDValue(); | |||
4032 | } | |||
4033 | ||||
4034 | uint64_t MaskVal = MaskCnst->getZExtValue(); | |||
4035 | if (MaskVal != 0xff) { | |||
4036 | // Not an AND that chops off top 8 bits | |||
4037 | return SDValue(); | |||
4038 | } | |||
4039 | ||||
4040 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); | |||
4041 | if (!Mem) { | |||
4042 | // Not a MemSDNode?!? | |||
4043 | return SDValue(); | |||
4044 | } | |||
4045 | ||||
4046 | EVT MemVT = Mem->getMemoryVT(); | |||
4047 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { | |||
4048 | // We only handle the i8 case | |||
4049 | return SDValue(); | |||
4050 | } | |||
4051 | ||||
4052 | unsigned ExtType = | |||
4053 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> | |||
4054 | getZExtValue(); | |||
4055 | if (ExtType == ISD::SEXTLOAD) { | |||
4056 | // If for some reason the load is a sextload, the and is needed to zero | |||
4057 | // out the high 8 bits | |||
4058 | return SDValue(); | |||
4059 | } | |||
4060 | ||||
4061 | bool AddTo = false; | |||
4062 | if (AExt.getNode() != 0) { | |||
4063 | // Re-insert the ext as a zext. | |||
4064 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), | |||
4065 | AExt.getValueType(), Val); | |||
4066 | AddTo = true; | |||
4067 | } | |||
4068 | ||||
4069 | // If we get here, the AND is unnecessary. Just replace it with the load | |||
4070 | DCI.CombineTo(N, Val, AddTo); | |||
4071 | } | |||
4072 | ||||
4073 | return SDValue(); | |||
4074 | } | |||
4075 | ||||
4076 | static SDValue PerformSELECTCombine(SDNode *N, | |||
4077 | TargetLowering::DAGCombinerInfo &DCI) { | |||
4078 | // Currently this detects patterns for integer min and max and | |||
4079 | // lowers them to PTX-specific intrinsics that enable hardware | |||
4080 | // support. | |||
4081 | ||||
4082 | const SDValue Cond = N->getOperand(0); | |||
4083 | if (Cond.getOpcode() != ISD::SETCC) return SDValue(); | |||
4084 | ||||
4085 | const SDValue LHS = Cond.getOperand(0); | |||
4086 | const SDValue RHS = Cond.getOperand(1); | |||
4087 | const SDValue True = N->getOperand(1); | |||
4088 | const SDValue False = N->getOperand(2); | |||
4089 | if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) | |||
4090 | return SDValue(); | |||
4091 | ||||
4092 | const EVT VT = N->getValueType(0); | |||
4093 | if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); | |||
4094 | ||||
4095 | const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); | |||
4096 | SDValue Larger; // The larger of LHS and RHS when condition is true. | |||
4097 | switch (CC) { | |||
4098 | case ISD::SETULT: | |||
4099 | case ISD::SETULE: | |||
4100 | case ISD::SETLT: | |||
4101 | case ISD::SETLE: | |||
4102 | Larger = RHS; | |||
4103 | break; | |||
4104 | ||||
4105 | case ISD::SETGT: | |||
4106 | case ISD::SETGE: | |||
4107 | case ISD::SETUGT: | |||
4108 | case ISD::SETUGE: | |||
4109 | Larger = LHS; | |||
4110 | break; | |||
4111 | ||||
4112 | default: | |||
4113 | return SDValue(); | |||
4114 | } | |||
4115 | const bool IsMax = (Larger == True); | |||
4116 | const bool IsSigned = ISD::isSignedIntSetCC(CC); | |||
4117 | ||||
4118 | unsigned IntrinsicId; | |||
4119 | if (VT == MVT::i32) { | |||
4120 | if (IsSigned) | |||
4121 | IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; | |||
4122 | else | |||
4123 | IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; | |||
4124 | } else { | |||
4125 | assert(VT == MVT::i64)((VT == MVT::i64) ? static_cast<void> (0) : __assert_fail ("VT == MVT::i64", "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4125, __PRETTY_FUNCTION__)); | |||
4126 | if (IsSigned) | |||
4127 | IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; | |||
4128 | else | |||
4129 | IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; | |||
4130 | } | |||
4131 | ||||
4132 | SDLoc DL(N); | |||
4133 | return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, | |||
4134 | DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); | |||
4135 | } | |||
4136 | ||||
4137 | static SDValue PerformREMCombine(SDNode *N, | |||
4138 | TargetLowering::DAGCombinerInfo &DCI, | |||
4139 | CodeGenOpt::Level OptLevel) { | |||
4140 | assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM)((N->getOpcode() == ISD::SREM || N->getOpcode() == ISD:: UREM) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4140, __PRETTY_FUNCTION__)); | |||
4141 | ||||
4142 | // Don't do anything at less than -O2. | |||
4143 | if (OptLevel < CodeGenOpt::Default) | |||
4144 | return SDValue(); | |||
4145 | ||||
4146 | SelectionDAG &DAG = DCI.DAG; | |||
4147 | SDLoc DL(N); | |||
4148 | EVT VT = N->getValueType(0); | |||
4149 | bool IsSigned = N->getOpcode() == ISD::SREM; | |||
4150 | unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; | |||
4151 | ||||
4152 | const SDValue &Num = N->getOperand(0); | |||
4153 | const SDValue &Den = N->getOperand(1); | |||
4154 | ||||
4155 | for (const SDNode *U : Num->uses()) { | |||
4156 | if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && | |||
4157 | U->getOperand(1) == Den) { | |||
4158 | // Num % Den -> Num - (Num / Den) * Den | |||
4159 | return DAG.getNode(ISD::SUB, DL, VT, Num, | |||
4160 | DAG.getNode(ISD::MUL, DL, VT, | |||
4161 | DAG.getNode(DivOpc, DL, VT, Num, Den), | |||
4162 | Den)); | |||
4163 | } | |||
4164 | } | |||
4165 | return SDValue(); | |||
4166 | } | |||
4167 | ||||
4168 | enum OperandSignedness { | |||
4169 | Signed = 0, | |||
4170 | Unsigned, | |||
4171 | Unknown | |||
4172 | }; | |||
4173 | ||||
4174 | /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand | |||
4175 | /// that can be demoted to \p OptSize bits without loss of information. The | |||
4176 | /// signedness of the operand, if determinable, is placed in \p S. | |||
4177 | static bool IsMulWideOperandDemotable(SDValue Op, | |||
4178 | unsigned OptSize, | |||
4179 | OperandSignedness &S) { | |||
4180 | S = Unknown; | |||
4181 | ||||
4182 | if (Op.getOpcode() == ISD::SIGN_EXTEND || | |||
4183 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { | |||
4184 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4185 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4186 | S = Signed; | |||
4187 | return true; | |||
4188 | } | |||
4189 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { | |||
4190 | EVT OrigVT = Op.getOperand(0).getValueType(); | |||
4191 | if (OrigVT.getSizeInBits() <= OptSize) { | |||
4192 | S = Unsigned; | |||
4193 | return true; | |||
4194 | } | |||
4195 | } | |||
4196 | ||||
4197 | return false; | |||
4198 | } | |||
4199 | ||||
4200 | /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can | |||
4201 | /// be demoted to \p OptSize bits without loss of information. If the operands | |||
4202 | /// contain a constant, it should appear as the RHS operand. The signedness of | |||
4203 | /// the operands is placed in \p IsSigned. | |||
4204 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, | |||
4205 | unsigned OptSize, | |||
4206 | bool &IsSigned) { | |||
4207 | ||||
4208 | OperandSignedness LHSSign; | |||
4209 | ||||
4210 | // The LHS operand must be a demotable op | |||
4211 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) | |||
4212 | return false; | |||
4213 | ||||
4214 | // We should have been able to determine the signedness from the LHS | |||
4215 | if (LHSSign == Unknown) | |||
4216 | return false; | |||
4217 | ||||
4218 | IsSigned = (LHSSign == Signed); | |||
4219 | ||||
4220 | // The RHS can be a demotable op or a constant | |||
4221 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { | |||
4222 | const APInt &Val = CI->getAPIntValue(); | |||
4223 | if (LHSSign == Unsigned) { | |||
4224 | return Val.isIntN(OptSize); | |||
4225 | } else { | |||
4226 | return Val.isSignedIntN(OptSize); | |||
4227 | } | |||
4228 | } else { | |||
4229 | OperandSignedness RHSSign; | |||
4230 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) | |||
4231 | return false; | |||
4232 | ||||
4233 | return LHSSign == RHSSign; | |||
4234 | } | |||
4235 | } | |||
4236 | ||||
4237 | /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply | |||
4238 | /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform | |||
4239 | /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift | |||
4240 | /// amount. | |||
4241 | static SDValue TryMULWIDECombine(SDNode *N, | |||
4242 | TargetLowering::DAGCombinerInfo &DCI) { | |||
4243 | EVT MulType = N->getValueType(0); | |||
4244 | if (MulType != MVT::i32 && MulType != MVT::i64) { | |||
4245 | return SDValue(); | |||
4246 | } | |||
4247 | ||||
4248 | SDLoc DL(N); | |||
4249 | unsigned OptSize = MulType.getSizeInBits() >> 1; | |||
4250 | SDValue LHS = N->getOperand(0); | |||
4251 | SDValue RHS = N->getOperand(1); | |||
4252 | ||||
4253 | // Canonicalize the multiply so the constant (if any) is on the right | |||
4254 | if (N->getOpcode() == ISD::MUL) { | |||
4255 | if (isa<ConstantSDNode>(LHS)) { | |||
4256 | std::swap(LHS, RHS); | |||
4257 | } | |||
4258 | } | |||
4259 | ||||
4260 | // If we have a SHL, determine the actual multiply amount | |||
4261 | if (N->getOpcode() == ISD::SHL) { | |||
4262 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); | |||
4263 | if (!ShlRHS) { | |||
4264 | return SDValue(); | |||
4265 | } | |||
4266 | ||||
4267 | APInt ShiftAmt = ShlRHS->getAPIntValue(); | |||
4268 | unsigned BitWidth = MulType.getSizeInBits(); | |||
4269 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { | |||
4270 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; | |||
4271 | RHS = DCI.DAG.getConstant(MulVal, DL, MulType); | |||
4272 | } else { | |||
4273 | return SDValue(); | |||
4274 | } | |||
4275 | } | |||
4276 | ||||
4277 | bool Signed; | |||
4278 | // Verify that our operands are demotable | |||
4279 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { | |||
4280 | return SDValue(); | |||
4281 | } | |||
4282 | ||||
4283 | EVT DemotedVT; | |||
4284 | if (MulType == MVT::i32) { | |||
4285 | DemotedVT = MVT::i16; | |||
4286 | } else { | |||
4287 | DemotedVT = MVT::i32; | |||
4288 | } | |||
4289 | ||||
4290 | // Truncate the operands to the correct size. Note that these are just for | |||
4291 | // type consistency and will (likely) be eliminated in later phases. | |||
4292 | SDValue TruncLHS = | |||
4293 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); | |||
4294 | SDValue TruncRHS = | |||
4295 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); | |||
4296 | ||||
4297 | unsigned Opc; | |||
4298 | if (Signed) { | |||
4299 | Opc = NVPTXISD::MUL_WIDE_SIGNED; | |||
4300 | } else { | |||
4301 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; | |||
4302 | } | |||
4303 | ||||
4304 | return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); | |||
4305 | } | |||
4306 | ||||
4307 | /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. | |||
4308 | static SDValue PerformMULCombine(SDNode *N, | |||
4309 | TargetLowering::DAGCombinerInfo &DCI, | |||
4310 | CodeGenOpt::Level OptLevel) { | |||
4311 | if (OptLevel > 0) { | |||
4312 | // Try mul.wide combining at OptLevel > 0 | |||
4313 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) | |||
4314 | return Ret; | |||
4315 | } | |||
4316 | ||||
4317 | return SDValue(); | |||
4318 | } | |||
4319 | ||||
4320 | /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. | |||
4321 | static SDValue PerformSHLCombine(SDNode *N, | |||
4322 | TargetLowering::DAGCombinerInfo &DCI, | |||
4323 | CodeGenOpt::Level OptLevel) { | |||
4324 | if (OptLevel > 0) { | |||
4325 | // Try mul.wide combining at OptLevel > 0 | |||
4326 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) | |||
4327 | return Ret; | |||
4328 | } | |||
4329 | ||||
4330 | return SDValue(); | |||
4331 | } | |||
4332 | ||||
4333 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, | |||
4334 | DAGCombinerInfo &DCI) const { | |||
4335 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); | |||
4336 | switch (N->getOpcode()) { | |||
4337 | default: break; | |||
4338 | case ISD::ADD: | |||
4339 | case ISD::FADD: | |||
4340 | return PerformADDCombine(N, DCI, STI, OptLevel); | |||
4341 | case ISD::MUL: | |||
4342 | return PerformMULCombine(N, DCI, OptLevel); | |||
4343 | case ISD::SHL: | |||
4344 | return PerformSHLCombine(N, DCI, OptLevel); | |||
4345 | case ISD::AND: | |||
4346 | return PerformANDCombine(N, DCI); | |||
4347 | case ISD::SELECT: | |||
4348 | return PerformSELECTCombine(N, DCI); | |||
4349 | case ISD::UREM: | |||
4350 | case ISD::SREM: | |||
4351 | return PerformREMCombine(N, DCI, OptLevel); | |||
4352 | } | |||
4353 | return SDValue(); | |||
4354 | } | |||
4355 | ||||
4356 | /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. | |||
4357 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, | |||
4358 | SmallVectorImpl<SDValue> &Results) { | |||
4359 | EVT ResVT = N->getValueType(0); | |||
4360 | SDLoc DL(N); | |||
4361 | ||||
4362 | assert(ResVT.isVector() && "Vector load must have vector type")((ResVT.isVector() && "Vector load must have vector type" ) ? static_cast<void> (0) : __assert_fail ("ResVT.isVector() && \"Vector load must have vector type\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4362, __PRETTY_FUNCTION__)); | |||
4363 | ||||
4364 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | |||
4365 | // legal. We can (and should) split that into 2 loads of <2 x double> here | |||
4366 | // but I'm leaving that as a TODO for now. | |||
4367 | assert(ResVT.isSimple() && "Can only handle simple types")((ResVT.isSimple() && "Can only handle simple types") ? static_cast<void> (0) : __assert_fail ("ResVT.isSimple() && \"Can only handle simple types\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4367, __PRETTY_FUNCTION__)); | |||
4368 | switch (ResVT.getSimpleVT().SimpleTy) { | |||
4369 | default: | |||
4370 | return; | |||
4371 | case MVT::v2i8: | |||
4372 | case MVT::v2i16: | |||
4373 | case MVT::v2i32: | |||
4374 | case MVT::v2i64: | |||
4375 | case MVT::v2f32: | |||
4376 | case MVT::v2f64: | |||
4377 | case MVT::v4i8: | |||
4378 | case MVT::v4i16: | |||
4379 | case MVT::v4i32: | |||
4380 | case MVT::v4f32: | |||
4381 | // This is a "native" vector type | |||
4382 | break; | |||
4383 | } | |||
4384 | ||||
4385 | LoadSDNode *LD = cast<LoadSDNode>(N); | |||
4386 | ||||
4387 | unsigned Align = LD->getAlignment(); | |||
4388 | auto &TD = DAG.getDataLayout(); | |||
4389 | unsigned PrefAlign = | |||
4390 | TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); | |||
4391 | if (Align < PrefAlign) { | |||
4392 | // This load is not sufficiently aligned, so bail out and let this vector | |||
4393 | // load be scalarized. Note that we may still be able to emit smaller | |||
4394 | // vector loads. For example, if we are loading a <4 x float> with an | |||
4395 | // alignment of 8, this check will fail but the legalizer will try again | |||
4396 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | |||
4397 | return; | |||
4398 | } | |||
4399 | ||||
4400 | EVT EltVT = ResVT.getVectorElementType(); | |||
4401 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4402 | ||||
4403 | // Since LoadV2 is a target node, we cannot rely on DAG type legalization. | |||
4404 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4405 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4406 | bool NeedTrunc = false; | |||
4407 | if (EltVT.getSizeInBits() < 16) { | |||
4408 | EltVT = MVT::i16; | |||
4409 | NeedTrunc = true; | |||
4410 | } | |||
4411 | ||||
4412 | unsigned Opcode = 0; | |||
4413 | SDVTList LdResVTs; | |||
4414 | ||||
4415 | switch (NumElts) { | |||
4416 | default: | |||
4417 | return; | |||
4418 | case 2: | |||
4419 | Opcode = NVPTXISD::LoadV2; | |||
4420 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4421 | break; | |||
4422 | case 4: { | |||
4423 | Opcode = NVPTXISD::LoadV4; | |||
4424 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4425 | LdResVTs = DAG.getVTList(ListVTs); | |||
4426 | break; | |||
4427 | } | |||
4428 | } | |||
4429 | ||||
4430 | // Copy regular operands | |||
4431 | SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); | |||
4432 | ||||
4433 | // The select routine does not have access to the LoadSDNode instance, so | |||
4434 | // pass along the extension information | |||
4435 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); | |||
4436 | ||||
4437 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4438 | LD->getMemoryVT(), | |||
4439 | LD->getMemOperand()); | |||
4440 | ||||
4441 | SmallVector<SDValue, 4> ScalarRes; | |||
4442 | ||||
4443 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4444 | SDValue Res = NewLD.getValue(i); | |||
4445 | if (NeedTrunc) | |||
4446 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4447 | ScalarRes.push_back(Res); | |||
4448 | } | |||
4449 | ||||
4450 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4451 | ||||
4452 | SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); | |||
4453 | ||||
4454 | Results.push_back(BuildVec); | |||
4455 | Results.push_back(LoadChain); | |||
4456 | } | |||
4457 | ||||
4458 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, | |||
4459 | SmallVectorImpl<SDValue> &Results) { | |||
4460 | SDValue Chain = N->getOperand(0); | |||
4461 | SDValue Intrin = N->getOperand(1); | |||
4462 | SDLoc DL(N); | |||
4463 | ||||
4464 | // Get the intrinsic ID | |||
4465 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); | |||
4466 | switch (IntrinNo) { | |||
4467 | default: | |||
4468 | return; | |||
4469 | case Intrinsic::nvvm_ldg_global_i: | |||
4470 | case Intrinsic::nvvm_ldg_global_f: | |||
4471 | case Intrinsic::nvvm_ldg_global_p: | |||
4472 | case Intrinsic::nvvm_ldu_global_i: | |||
4473 | case Intrinsic::nvvm_ldu_global_f: | |||
4474 | case Intrinsic::nvvm_ldu_global_p: { | |||
4475 | EVT ResVT = N->getValueType(0); | |||
4476 | ||||
4477 | if (ResVT.isVector()) { | |||
4478 | // Vector LDG/LDU | |||
4479 | ||||
4480 | unsigned NumElts = ResVT.getVectorNumElements(); | |||
4481 | EVT EltVT = ResVT.getVectorElementType(); | |||
4482 | ||||
4483 | // Since LDU/LDG are target nodes, we cannot rely on DAG type | |||
4484 | // legalization. | |||
4485 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | |||
4486 | // loaded type to i16 and propagate the "real" type as the memory type. | |||
4487 | bool NeedTrunc = false; | |||
4488 | if (EltVT.getSizeInBits() < 16) { | |||
4489 | EltVT = MVT::i16; | |||
4490 | NeedTrunc = true; | |||
4491 | } | |||
4492 | ||||
4493 | unsigned Opcode = 0; | |||
4494 | SDVTList LdResVTs; | |||
4495 | ||||
4496 | switch (NumElts) { | |||
4497 | default: | |||
4498 | return; | |||
4499 | case 2: | |||
4500 | switch (IntrinNo) { | |||
4501 | default: | |||
4502 | return; | |||
4503 | case Intrinsic::nvvm_ldg_global_i: | |||
4504 | case Intrinsic::nvvm_ldg_global_f: | |||
4505 | case Intrinsic::nvvm_ldg_global_p: | |||
4506 | Opcode = NVPTXISD::LDGV2; | |||
4507 | break; | |||
4508 | case Intrinsic::nvvm_ldu_global_i: | |||
4509 | case Intrinsic::nvvm_ldu_global_f: | |||
4510 | case Intrinsic::nvvm_ldu_global_p: | |||
4511 | Opcode = NVPTXISD::LDUV2; | |||
4512 | break; | |||
4513 | } | |||
4514 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | |||
4515 | break; | |||
4516 | case 4: { | |||
4517 | switch (IntrinNo) { | |||
4518 | default: | |||
4519 | return; | |||
4520 | case Intrinsic::nvvm_ldg_global_i: | |||
4521 | case Intrinsic::nvvm_ldg_global_f: | |||
4522 | case Intrinsic::nvvm_ldg_global_p: | |||
4523 | Opcode = NVPTXISD::LDGV4; | |||
4524 | break; | |||
4525 | case Intrinsic::nvvm_ldu_global_i: | |||
4526 | case Intrinsic::nvvm_ldu_global_f: | |||
4527 | case Intrinsic::nvvm_ldu_global_p: | |||
4528 | Opcode = NVPTXISD::LDUV4; | |||
4529 | break; | |||
4530 | } | |||
4531 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | |||
4532 | LdResVTs = DAG.getVTList(ListVTs); | |||
4533 | break; | |||
4534 | } | |||
4535 | } | |||
4536 | ||||
4537 | SmallVector<SDValue, 8> OtherOps; | |||
4538 | ||||
4539 | // Copy regular operands | |||
4540 | ||||
4541 | OtherOps.push_back(Chain); // Chain | |||
4542 | // Skip operand 1 (intrinsic ID) | |||
4543 | // Others | |||
4544 | OtherOps.append(N->op_begin() + 2, N->op_end()); | |||
4545 | ||||
4546 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4547 | ||||
4548 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | |||
4549 | MemSD->getMemoryVT(), | |||
4550 | MemSD->getMemOperand()); | |||
4551 | ||||
4552 | SmallVector<SDValue, 4> ScalarRes; | |||
4553 | ||||
4554 | for (unsigned i = 0; i < NumElts; ++i) { | |||
4555 | SDValue Res = NewLD.getValue(i); | |||
4556 | if (NeedTrunc) | |||
4557 | Res = | |||
4558 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | |||
4559 | ScalarRes.push_back(Res); | |||
4560 | } | |||
4561 | ||||
4562 | SDValue LoadChain = NewLD.getValue(NumElts); | |||
4563 | ||||
4564 | SDValue BuildVec = | |||
4565 | DAG.getBuildVector(ResVT, DL, ScalarRes); | |||
4566 | ||||
4567 | Results.push_back(BuildVec); | |||
4568 | Results.push_back(LoadChain); | |||
4569 | } else { | |||
4570 | // i8 LDG/LDU | |||
4571 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4572, __PRETTY_FUNCTION__)) | |||
4572 | "Custom handling of non-i8 ldu/ldg?")((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4572, __PRETTY_FUNCTION__)); | |||
4573 | ||||
4574 | // Just copy all operands as-is | |||
4575 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); | |||
4576 | ||||
4577 | // Force output to i16 | |||
4578 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); | |||
4579 | ||||
4580 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | |||
4581 | ||||
4582 | // We make sure the memory type is i8, which will be used during isel | |||
4583 | // to select the proper instruction. | |||
4584 | SDValue NewLD = | |||
4585 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, | |||
4586 | MVT::i8, MemSD->getMemOperand()); | |||
4587 | ||||
4588 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, | |||
4589 | NewLD.getValue(0))); | |||
4590 | Results.push_back(NewLD.getValue(1)); | |||
4591 | } | |||
4592 | } | |||
4593 | } | |||
4594 | } | |||
4595 | ||||
4596 | void NVPTXTargetLowering::ReplaceNodeResults( | |||
4597 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { | |||
4598 | switch (N->getOpcode()) { | |||
4599 | default: | |||
4600 | report_fatal_error("Unhandled custom legalization"); | |||
4601 | case ISD::LOAD: | |||
4602 | ReplaceLoadVector(N, DAG, Results); | |||
4603 | return; | |||
4604 | case ISD::INTRINSIC_W_CHAIN: | |||
4605 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); | |||
4606 | return; | |||
4607 | } | |||
4608 | } | |||
4609 | ||||
4610 | // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. | |||
4611 | void NVPTXSection::anchor() {} | |||
4612 | ||||
4613 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { | |||
4614 | delete static_cast<NVPTXSection *>(TextSection); | |||
4615 | delete static_cast<NVPTXSection *>(DataSection); | |||
4616 | delete static_cast<NVPTXSection *>(BSSSection); | |||
4617 | delete static_cast<NVPTXSection *>(ReadOnlySection); | |||
4618 | ||||
4619 | delete static_cast<NVPTXSection *>(StaticCtorSection); | |||
4620 | delete static_cast<NVPTXSection *>(StaticDtorSection); | |||
4621 | delete static_cast<NVPTXSection *>(LSDASection); | |||
4622 | delete static_cast<NVPTXSection *>(EHFrameSection); | |||
4623 | delete static_cast<NVPTXSection *>(DwarfAbbrevSection); | |||
4624 | delete static_cast<NVPTXSection *>(DwarfInfoSection); | |||
4625 | delete static_cast<NVPTXSection *>(DwarfLineSection); | |||
4626 | delete static_cast<NVPTXSection *>(DwarfFrameSection); | |||
4627 | delete static_cast<NVPTXSection *>(DwarfPubTypesSection); | |||
4628 | delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection); | |||
4629 | delete static_cast<NVPTXSection *>(DwarfStrSection); | |||
4630 | delete static_cast<NVPTXSection *>(DwarfLocSection); | |||
4631 | delete static_cast<NVPTXSection *>(DwarfARangesSection); | |||
4632 | delete static_cast<NVPTXSection *>(DwarfRangesSection); | |||
4633 | delete static_cast<NVPTXSection *>(DwarfMacinfoSection); | |||
4634 | } | |||
4635 | ||||
4636 | MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( | |||
4637 | const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { | |||
4638 | return getDataSection(); | |||
4639 | } |