File: | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |
Warning: | line 249, column 33 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | // | ||||||
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a | ||||||
10 | // selection DAG. | ||||||
11 | // | ||||||
12 | //===----------------------------------------------------------------------===// | ||||||
13 | |||||||
14 | #include "NVPTXISelLowering.h" | ||||||
15 | #include "MCTargetDesc/NVPTXBaseInfo.h" | ||||||
16 | #include "NVPTX.h" | ||||||
17 | #include "NVPTXSubtarget.h" | ||||||
18 | #include "NVPTXTargetMachine.h" | ||||||
19 | #include "NVPTXTargetObjectFile.h" | ||||||
20 | #include "NVPTXUtilities.h" | ||||||
21 | #include "llvm/ADT/APInt.h" | ||||||
22 | #include "llvm/ADT/SmallVector.h" | ||||||
23 | #include "llvm/ADT/StringRef.h" | ||||||
24 | #include "llvm/CodeGen/Analysis.h" | ||||||
25 | #include "llvm/CodeGen/MachineFunction.h" | ||||||
26 | #include "llvm/CodeGen/MachineMemOperand.h" | ||||||
27 | #include "llvm/CodeGen/SelectionDAG.h" | ||||||
28 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||||
29 | #include "llvm/CodeGen/TargetCallingConv.h" | ||||||
30 | #include "llvm/CodeGen/TargetLowering.h" | ||||||
31 | #include "llvm/CodeGen/ValueTypes.h" | ||||||
32 | #include "llvm/IR/Argument.h" | ||||||
33 | #include "llvm/IR/Attributes.h" | ||||||
34 | #include "llvm/IR/CallSite.h" | ||||||
35 | #include "llvm/IR/Constants.h" | ||||||
36 | #include "llvm/IR/DataLayout.h" | ||||||
37 | #include "llvm/IR/DerivedTypes.h" | ||||||
38 | #include "llvm/IR/Function.h" | ||||||
39 | #include "llvm/IR/GlobalValue.h" | ||||||
40 | #include "llvm/IR/Instruction.h" | ||||||
41 | #include "llvm/IR/Instructions.h" | ||||||
42 | #include "llvm/IR/IntrinsicsNVPTX.h" | ||||||
43 | #include "llvm/IR/Module.h" | ||||||
44 | #include "llvm/IR/Type.h" | ||||||
45 | #include "llvm/IR/Value.h" | ||||||
46 | #include "llvm/Support/Casting.h" | ||||||
47 | #include "llvm/Support/CodeGen.h" | ||||||
48 | #include "llvm/Support/CommandLine.h" | ||||||
49 | #include "llvm/Support/ErrorHandling.h" | ||||||
50 | #include "llvm/Support/MachineValueType.h" | ||||||
51 | #include "llvm/Support/MathExtras.h" | ||||||
52 | #include "llvm/Support/raw_ostream.h" | ||||||
53 | #include "llvm/Target/TargetMachine.h" | ||||||
54 | #include "llvm/Target/TargetOptions.h" | ||||||
55 | #include <algorithm> | ||||||
56 | #include <cassert> | ||||||
57 | #include <cstdint> | ||||||
58 | #include <iterator> | ||||||
59 | #include <sstream> | ||||||
60 | #include <string> | ||||||
61 | #include <utility> | ||||||
62 | #include <vector> | ||||||
63 | |||||||
64 | #define DEBUG_TYPE"nvptx-lower" "nvptx-lower" | ||||||
65 | |||||||
66 | using namespace llvm; | ||||||
67 | |||||||
68 | static unsigned int uniqueCallSite = 0; | ||||||
69 | |||||||
70 | static cl::opt<bool> sched4reg( | ||||||
71 | "nvptx-sched4reg", | ||||||
72 | cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); | ||||||
73 | |||||||
74 | static cl::opt<unsigned> | ||||||
75 | FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, | ||||||
76 | cl::desc("NVPTX Specific: FMA contraction (0: don't do it" | ||||||
77 | " 1: do it 2: do it aggressively"), | ||||||
78 | cl::init(2)); | ||||||
79 | |||||||
80 | static cl::opt<int> UsePrecDivF32( | ||||||
81 | "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, | ||||||
82 | cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" | ||||||
83 | " IEEE Compliant F32 div.rnd if available."), | ||||||
84 | cl::init(2)); | ||||||
85 | |||||||
86 | static cl::opt<bool> UsePrecSqrtF32( | ||||||
87 | "nvptx-prec-sqrtf32", cl::Hidden, | ||||||
88 | cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), | ||||||
89 | cl::init(true)); | ||||||
90 | |||||||
91 | static cl::opt<bool> FtzEnabled( | ||||||
92 | "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, | ||||||
93 | cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), | ||||||
94 | cl::init(false)); | ||||||
95 | |||||||
96 | int NVPTXTargetLowering::getDivF32Level() const { | ||||||
97 | if (UsePrecDivF32.getNumOccurrences() > 0) { | ||||||
98 | // If nvptx-prec-div32=N is used on the command-line, always honor it | ||||||
99 | return UsePrecDivF32; | ||||||
100 | } else { | ||||||
101 | // Otherwise, use div.approx if fast math is enabled | ||||||
102 | if (getTargetMachine().Options.UnsafeFPMath) | ||||||
103 | return 0; | ||||||
104 | else | ||||||
105 | return 2; | ||||||
106 | } | ||||||
107 | } | ||||||
108 | |||||||
109 | bool NVPTXTargetLowering::usePrecSqrtF32() const { | ||||||
110 | if (UsePrecSqrtF32.getNumOccurrences() > 0) { | ||||||
111 | // If nvptx-prec-sqrtf32 is used on the command-line, always honor it | ||||||
112 | return UsePrecSqrtF32; | ||||||
113 | } else { | ||||||
114 | // Otherwise, use sqrt.approx if fast math is enabled | ||||||
115 | return !getTargetMachine().Options.UnsafeFPMath; | ||||||
116 | } | ||||||
117 | } | ||||||
118 | |||||||
119 | bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { | ||||||
120 | // TODO: Get rid of this flag; there can be only one way to do this. | ||||||
121 | if (FtzEnabled.getNumOccurrences() > 0) { | ||||||
122 | // If nvptx-f32ftz is used on the command-line, always honor it | ||||||
123 | return FtzEnabled; | ||||||
124 | } | ||||||
125 | |||||||
126 | return MF.getDenormalMode(APFloat::IEEEsingle()).Output == | ||||||
127 | DenormalMode::PreserveSign; | ||||||
128 | } | ||||||
129 | |||||||
130 | static bool IsPTXVectorType(MVT VT) { | ||||||
131 | switch (VT.SimpleTy) { | ||||||
132 | default: | ||||||
133 | return false; | ||||||
134 | case MVT::v2i1: | ||||||
135 | case MVT::v4i1: | ||||||
136 | case MVT::v2i8: | ||||||
137 | case MVT::v4i8: | ||||||
138 | case MVT::v2i16: | ||||||
139 | case MVT::v4i16: | ||||||
140 | case MVT::v2i32: | ||||||
141 | case MVT::v4i32: | ||||||
142 | case MVT::v2i64: | ||||||
143 | case MVT::v2f16: | ||||||
144 | case MVT::v4f16: | ||||||
145 | case MVT::v8f16: // <4 x f16x2> | ||||||
146 | case MVT::v2f32: | ||||||
147 | case MVT::v4f32: | ||||||
148 | case MVT::v2f64: | ||||||
149 | return true; | ||||||
150 | } | ||||||
151 | } | ||||||
152 | |||||||
153 | /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive | ||||||
154 | /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors | ||||||
155 | /// into their primitive components. | ||||||
156 | /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the | ||||||
157 | /// same number of types as the Ins/Outs arrays in LowerFormalArguments, | ||||||
158 | /// LowerCall, and LowerReturn. | ||||||
159 | static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, | ||||||
160 | Type *Ty, SmallVectorImpl<EVT> &ValueVTs, | ||||||
161 | SmallVectorImpl<uint64_t> *Offsets = nullptr, | ||||||
162 | uint64_t StartingOffset = 0) { | ||||||
163 | SmallVector<EVT, 16> TempVTs; | ||||||
164 | SmallVector<uint64_t, 16> TempOffsets; | ||||||
165 | |||||||
166 | // Special case for i128 - decompose to (i64, i64) | ||||||
167 | if (Ty->isIntegerTy(128)) { | ||||||
168 | ValueVTs.push_back(EVT(MVT::i64)); | ||||||
169 | ValueVTs.push_back(EVT(MVT::i64)); | ||||||
170 | |||||||
171 | if (Offsets) { | ||||||
172 | Offsets->push_back(StartingOffset + 0); | ||||||
173 | Offsets->push_back(StartingOffset + 8); | ||||||
174 | } | ||||||
175 | |||||||
176 | return; | ||||||
177 | } | ||||||
178 | |||||||
179 | // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs. | ||||||
180 | if (StructType *STy = dyn_cast<StructType>(Ty)) { | ||||||
181 | auto const *SL = DL.getStructLayout(STy); | ||||||
182 | auto ElementNum = 0; | ||||||
183 | for(auto *EI : STy->elements()) { | ||||||
184 | ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets, | ||||||
185 | StartingOffset + SL->getElementOffset(ElementNum)); | ||||||
186 | ++ElementNum; | ||||||
187 | } | ||||||
188 | return; | ||||||
189 | } | ||||||
190 | |||||||
191 | ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); | ||||||
192 | for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { | ||||||
193 | EVT VT = TempVTs[i]; | ||||||
194 | uint64_t Off = TempOffsets[i]; | ||||||
195 | // Split vectors into individual elements, except for v2f16, which | ||||||
196 | // we will pass as a single scalar. | ||||||
197 | if (VT.isVector()) { | ||||||
198 | unsigned NumElts = VT.getVectorNumElements(); | ||||||
199 | EVT EltVT = VT.getVectorElementType(); | ||||||
200 | // Vectors with an even number of f16 elements will be passed to | ||||||
201 | // us as an array of v2f16 elements. We must match this so we | ||||||
202 | // stay in sync with Ins/Outs. | ||||||
203 | if (EltVT == MVT::f16 && NumElts % 2 == 0) { | ||||||
204 | EltVT = MVT::v2f16; | ||||||
205 | NumElts /= 2; | ||||||
206 | } | ||||||
207 | for (unsigned j = 0; j != NumElts; ++j) { | ||||||
208 | ValueVTs.push_back(EltVT); | ||||||
209 | if (Offsets) | ||||||
210 | Offsets->push_back(Off + j * EltVT.getStoreSize()); | ||||||
211 | } | ||||||
212 | } else { | ||||||
213 | ValueVTs.push_back(VT); | ||||||
214 | if (Offsets) | ||||||
215 | Offsets->push_back(Off); | ||||||
216 | } | ||||||
217 | } | ||||||
218 | } | ||||||
219 | |||||||
220 | // Check whether we can merge loads/stores of some of the pieces of a | ||||||
221 | // flattened function parameter or return value into a single vector | ||||||
222 | // load/store. | ||||||
223 | // | ||||||
224 | // The flattened parameter is represented as a list of EVTs and | ||||||
225 | // offsets, and the whole structure is aligned to ParamAlignment. This | ||||||
226 | // function determines whether we can load/store pieces of the | ||||||
227 | // parameter starting at index Idx using a single vectorized op of | ||||||
228 | // size AccessSize. If so, it returns the number of param pieces | ||||||
229 | // covered by the vector op. Otherwise, it returns 1. | ||||||
230 | static unsigned CanMergeParamLoadStoresStartingAt( | ||||||
231 | unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, | ||||||
232 | const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) { | ||||||
233 | assert(isPowerOf2_32(AccessSize) && "must be a power of 2!")((isPowerOf2_32(AccessSize) && "must be a power of 2!" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(AccessSize) && \"must be a power of 2!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 233, __PRETTY_FUNCTION__)); | ||||||
234 | |||||||
235 | // Can't vectorize if param alignment is not sufficient. | ||||||
236 | if (AccessSize > ParamAlignment) | ||||||
237 | return 1; | ||||||
238 | // Can't vectorize if offset is not aligned. | ||||||
239 | if (Offsets[Idx] & (AccessSize - 1)) | ||||||
240 | return 1; | ||||||
241 | |||||||
242 | EVT EltVT = ValueVTs[Idx]; | ||||||
243 | unsigned EltSize = EltVT.getStoreSize(); | ||||||
244 | |||||||
245 | // Element is too large to vectorize. | ||||||
246 | if (EltSize >= AccessSize) | ||||||
247 | return 1; | ||||||
248 | |||||||
249 | unsigned NumElts = AccessSize / EltSize; | ||||||
| |||||||
250 | // Can't vectorize if AccessBytes if not a multiple of EltSize. | ||||||
251 | if (AccessSize != EltSize * NumElts) | ||||||
252 | return 1; | ||||||
253 | |||||||
254 | // We don't have enough elements to vectorize. | ||||||
255 | if (Idx + NumElts > ValueVTs.size()) | ||||||
256 | return 1; | ||||||
257 | |||||||
258 | // PTX ISA can only deal with 2- and 4-element vector ops. | ||||||
259 | if (NumElts != 4 && NumElts != 2) | ||||||
260 | return 1; | ||||||
261 | |||||||
262 | for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) { | ||||||
263 | // Types do not match. | ||||||
264 | if (ValueVTs[j] != EltVT) | ||||||
265 | return 1; | ||||||
266 | |||||||
267 | // Elements are not contiguous. | ||||||
268 | if (Offsets[j] - Offsets[j - 1] != EltSize) | ||||||
269 | return 1; | ||||||
270 | } | ||||||
271 | // OK. We can vectorize ValueVTs[i..i+NumElts) | ||||||
272 | return NumElts; | ||||||
273 | } | ||||||
274 | |||||||
275 | // Flags for tracking per-element vectorization state of loads/stores | ||||||
276 | // of a flattened function parameter or return value. | ||||||
277 | enum ParamVectorizationFlags { | ||||||
278 | PVF_INNER = 0x0, // Middle elements of a vector. | ||||||
279 | PVF_FIRST = 0x1, // First element of the vector. | ||||||
280 | PVF_LAST = 0x2, // Last element of the vector. | ||||||
281 | // Scalar is effectively a 1-element vector. | ||||||
282 | PVF_SCALAR = PVF_FIRST | PVF_LAST | ||||||
283 | }; | ||||||
284 | |||||||
285 | // Computes whether and how we can vectorize the loads/stores of a | ||||||
286 | // flattened function parameter or return value. | ||||||
287 | // | ||||||
288 | // The flattened parameter is represented as the list of ValueVTs and | ||||||
289 | // Offsets, and is aligned to ParamAlignment bytes. We return a vector | ||||||
290 | // of the same size as ValueVTs indicating how each piece should be | ||||||
291 | // loaded/stored (i.e. as a scalar, or as part of a vector | ||||||
292 | // load/store). | ||||||
293 | static SmallVector<ParamVectorizationFlags, 16> | ||||||
294 | VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, | ||||||
295 | const SmallVectorImpl<uint64_t> &Offsets, | ||||||
296 | unsigned ParamAlignment) { | ||||||
297 | // Set vector size to match ValueVTs and mark all elements as | ||||||
298 | // scalars by default. | ||||||
299 | SmallVector<ParamVectorizationFlags, 16> VectorInfo; | ||||||
300 | VectorInfo.assign(ValueVTs.size(), PVF_SCALAR); | ||||||
301 | |||||||
302 | // Check what we can vectorize using 128/64/32-bit accesses. | ||||||
303 | for (int I = 0, E = ValueVTs.size(); I != E; ++I) { | ||||||
304 | // Skip elements we've already processed. | ||||||
305 | assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.")((VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state." ) ? static_cast<void> (0) : __assert_fail ("VectorInfo[I] == PVF_SCALAR && \"Unexpected vector info state.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 305, __PRETTY_FUNCTION__)); | ||||||
306 | for (unsigned AccessSize : {16, 8, 4, 2}) { | ||||||
307 | unsigned NumElts = CanMergeParamLoadStoresStartingAt( | ||||||
308 | I, AccessSize, ValueVTs, Offsets, ParamAlignment); | ||||||
309 | // Mark vectorized elements. | ||||||
310 | switch (NumElts) { | ||||||
311 | default: | ||||||
312 | llvm_unreachable("Unexpected return value")::llvm::llvm_unreachable_internal("Unexpected return value", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 312); | ||||||
313 | case 1: | ||||||
314 | // Can't vectorize using this size, try next smaller size. | ||||||
315 | continue; | ||||||
316 | case 2: | ||||||
317 | assert(I + 1 < E && "Not enough elements.")((I + 1 < E && "Not enough elements.") ? static_cast <void> (0) : __assert_fail ("I + 1 < E && \"Not enough elements.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 317, __PRETTY_FUNCTION__)); | ||||||
318 | VectorInfo[I] = PVF_FIRST; | ||||||
319 | VectorInfo[I + 1] = PVF_LAST; | ||||||
320 | I += 1; | ||||||
321 | break; | ||||||
322 | case 4: | ||||||
323 | assert(I + 3 < E && "Not enough elements.")((I + 3 < E && "Not enough elements.") ? static_cast <void> (0) : __assert_fail ("I + 3 < E && \"Not enough elements.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 323, __PRETTY_FUNCTION__)); | ||||||
324 | VectorInfo[I] = PVF_FIRST; | ||||||
325 | VectorInfo[I + 1] = PVF_INNER; | ||||||
326 | VectorInfo[I + 2] = PVF_INNER; | ||||||
327 | VectorInfo[I + 3] = PVF_LAST; | ||||||
328 | I += 3; | ||||||
329 | break; | ||||||
330 | } | ||||||
331 | // Break out of the inner loop because we've already succeeded | ||||||
332 | // using largest possible AccessSize. | ||||||
333 | break; | ||||||
334 | } | ||||||
335 | } | ||||||
336 | return VectorInfo; | ||||||
337 | } | ||||||
338 | |||||||
339 | // NVPTXTargetLowering Constructor. | ||||||
340 | NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, | ||||||
341 | const NVPTXSubtarget &STI) | ||||||
342 | : TargetLowering(TM), nvTM(&TM), STI(STI) { | ||||||
343 | // always lower memset, memcpy, and memmove intrinsics to load/store | ||||||
344 | // instructions, rather | ||||||
345 | // then generating calls to memset, mempcy or memmove. | ||||||
346 | MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; | ||||||
347 | MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; | ||||||
348 | MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; | ||||||
349 | |||||||
350 | setBooleanContents(ZeroOrNegativeOneBooleanContent); | ||||||
351 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | ||||||
352 | |||||||
353 | // Jump is Expensive. Don't create extra control flow for 'and', 'or' | ||||||
354 | // condition branches. | ||||||
355 | setJumpIsExpensive(true); | ||||||
356 | |||||||
357 | // Wide divides are _very_ slow. Try to reduce the width of the divide if | ||||||
358 | // possible. | ||||||
359 | addBypassSlowDiv(64, 32); | ||||||
360 | |||||||
361 | // By default, use the Source scheduling | ||||||
362 | if (sched4reg) | ||||||
363 | setSchedulingPreference(Sched::RegPressure); | ||||||
364 | else | ||||||
365 | setSchedulingPreference(Sched::Source); | ||||||
366 | |||||||
367 | auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, | ||||||
368 | LegalizeAction NoF16Action) { | ||||||
369 | setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action); | ||||||
370 | }; | ||||||
371 | |||||||
372 | addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); | ||||||
373 | addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); | ||||||
374 | addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); | ||||||
375 | addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); | ||||||
376 | addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); | ||||||
377 | addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); | ||||||
378 | addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); | ||||||
379 | addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); | ||||||
380 | |||||||
381 | // Conversion to/from FP16/FP16x2 is always legal. | ||||||
382 | setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); | ||||||
383 | setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal); | ||||||
384 | setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); | ||||||
385 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); | ||||||
386 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand); | ||||||
387 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand); | ||||||
388 | |||||||
389 | setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); | ||||||
390 | setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); | ||||||
391 | |||||||
392 | // Operations not directly supported by NVPTX. | ||||||
393 | for (MVT VT : {MVT::f16, MVT::v2f16, MVT::f32, MVT::f64, MVT::i1, MVT::i8, | ||||||
394 | MVT::i16, MVT::i32, MVT::i64}) { | ||||||
395 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||||
396 | setOperationAction(ISD::BR_CC, VT, Expand); | ||||||
397 | } | ||||||
398 | |||||||
399 | // Some SIGN_EXTEND_INREG can be done using cvt instruction. | ||||||
400 | // For others we will expand to a SHL/SRA pair. | ||||||
401 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); | ||||||
402 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); | ||||||
403 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); | ||||||
404 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); | ||||||
405 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | ||||||
406 | |||||||
407 | setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); | ||||||
408 | setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); | ||||||
409 | setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); | ||||||
410 | setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); | ||||||
411 | setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); | ||||||
412 | setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); | ||||||
413 | |||||||
414 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); | ||||||
415 | setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); | ||||||
416 | |||||||
417 | // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs | ||||||
418 | // that don't have h/w rotation we lower them to multi-instruction assembly. | ||||||
419 | // See ROT*_sw in NVPTXIntrInfo.td | ||||||
420 | setOperationAction(ISD::ROTL, MVT::i64, Legal); | ||||||
421 | setOperationAction(ISD::ROTR, MVT::i64, Legal); | ||||||
422 | setOperationAction(ISD::ROTL, MVT::i32, Legal); | ||||||
423 | setOperationAction(ISD::ROTR, MVT::i32, Legal); | ||||||
424 | |||||||
425 | setOperationAction(ISD::ROTL, MVT::i16, Expand); | ||||||
426 | setOperationAction(ISD::ROTR, MVT::i16, Expand); | ||||||
427 | setOperationAction(ISD::ROTL, MVT::i8, Expand); | ||||||
428 | setOperationAction(ISD::ROTR, MVT::i8, Expand); | ||||||
429 | setOperationAction(ISD::BSWAP, MVT::i16, Expand); | ||||||
430 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | ||||||
431 | setOperationAction(ISD::BSWAP, MVT::i64, Expand); | ||||||
432 | |||||||
433 | // Indirect branch is not supported. | ||||||
434 | // This also disables Jump Table creation. | ||||||
435 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); | ||||||
436 | setOperationAction(ISD::BRIND, MVT::Other, Expand); | ||||||
437 | |||||||
438 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | ||||||
439 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); | ||||||
440 | |||||||
441 | // We want to legalize constant related memmove and memcopy | ||||||
442 | // intrinsics. | ||||||
443 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); | ||||||
444 | |||||||
445 | // Turn FP extload into load/fpextend | ||||||
446 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); | ||||||
447 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); | ||||||
448 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); | ||||||
449 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); | ||||||
450 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); | ||||||
451 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); | ||||||
452 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); | ||||||
453 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); | ||||||
454 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); | ||||||
455 | // Turn FP truncstore into trunc + store. | ||||||
456 | // FIXME: vector types should also be expanded | ||||||
457 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | ||||||
458 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | ||||||
459 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | ||||||
460 | |||||||
461 | // PTX does not support load / store predicate registers | ||||||
462 | setOperationAction(ISD::LOAD, MVT::i1, Custom); | ||||||
463 | setOperationAction(ISD::STORE, MVT::i1, Custom); | ||||||
464 | |||||||
465 | for (MVT VT : MVT::integer_valuetypes()) { | ||||||
466 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | ||||||
467 | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); | ||||||
468 | setTruncStoreAction(VT, MVT::i1, Expand); | ||||||
469 | } | ||||||
470 | |||||||
471 | // This is legal in NVPTX | ||||||
472 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); | ||||||
473 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); | ||||||
474 | setOperationAction(ISD::ConstantFP, MVT::f16, Legal); | ||||||
475 | |||||||
476 | // TRAP can be lowered to PTX trap | ||||||
477 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | ||||||
478 | |||||||
479 | // Register custom handling for vector loads/stores | ||||||
480 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||||
481 | if (IsPTXVectorType(VT)) { | ||||||
482 | setOperationAction(ISD::LOAD, VT, Custom); | ||||||
483 | setOperationAction(ISD::STORE, VT, Custom); | ||||||
484 | setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); | ||||||
485 | } | ||||||
486 | } | ||||||
487 | |||||||
488 | // Custom handling for i8 intrinsics | ||||||
489 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); | ||||||
490 | |||||||
491 | for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { | ||||||
492 | setOperationAction(ISD::ABS, Ty, Legal); | ||||||
493 | setOperationAction(ISD::SMIN, Ty, Legal); | ||||||
494 | setOperationAction(ISD::SMAX, Ty, Legal); | ||||||
495 | setOperationAction(ISD::UMIN, Ty, Legal); | ||||||
496 | setOperationAction(ISD::UMAX, Ty, Legal); | ||||||
497 | |||||||
498 | setOperationAction(ISD::CTPOP, Ty, Legal); | ||||||
499 | setOperationAction(ISD::CTLZ, Ty, Legal); | ||||||
500 | } | ||||||
501 | |||||||
502 | setOperationAction(ISD::CTTZ, MVT::i16, Expand); | ||||||
503 | setOperationAction(ISD::CTTZ, MVT::i32, Expand); | ||||||
504 | setOperationAction(ISD::CTTZ, MVT::i64, Expand); | ||||||
505 | |||||||
506 | // PTX does not directly support SELP of i1, so promote to i32 first | ||||||
507 | setOperationAction(ISD::SELECT, MVT::i1, Custom); | ||||||
508 | |||||||
509 | // PTX cannot multiply two i64s in a single instruction. | ||||||
510 | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); | ||||||
511 | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); | ||||||
512 | |||||||
513 | // We have some custom DAG combine patterns for these nodes | ||||||
514 | setTargetDAGCombine(ISD::ADD); | ||||||
515 | setTargetDAGCombine(ISD::AND); | ||||||
516 | setTargetDAGCombine(ISD::FADD); | ||||||
517 | setTargetDAGCombine(ISD::MUL); | ||||||
518 | setTargetDAGCombine(ISD::SHL); | ||||||
519 | setTargetDAGCombine(ISD::SREM); | ||||||
520 | setTargetDAGCombine(ISD::UREM); | ||||||
521 | |||||||
522 | // setcc for f16x2 needs special handling to prevent legalizer's | ||||||
523 | // attempt to scalarize it due to v2i1 not being legal. | ||||||
524 | if (STI.allowFP16Math()) | ||||||
525 | setTargetDAGCombine(ISD::SETCC); | ||||||
526 | |||||||
527 | // Promote fp16 arithmetic if fp16 hardware isn't available or the | ||||||
528 | // user passed --nvptx-no-fp16-math. The flag is useful because, | ||||||
529 | // although sm_53+ GPUs have some sort of FP16 support in | ||||||
530 | // hardware, only sm_53 and sm_60 have full implementation. Others | ||||||
531 | // only have token amount of hardware and are likely to run faster | ||||||
532 | // by using fp32 units instead. | ||||||
533 | for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) { | ||||||
534 | setFP16OperationAction(Op, MVT::f16, Legal, Promote); | ||||||
535 | setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); | ||||||
536 | } | ||||||
537 | |||||||
538 | // There's no neg.f16 instruction. Expand to (0-x). | ||||||
539 | setOperationAction(ISD::FNEG, MVT::f16, Expand); | ||||||
540 | setOperationAction(ISD::FNEG, MVT::v2f16, Expand); | ||||||
541 | |||||||
542 | // (would be) Library functions. | ||||||
543 | |||||||
544 | // These map to conversion instructions for scalar FP types. | ||||||
545 | for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, | ||||||
546 | ISD::FTRUNC}) { | ||||||
547 | setOperationAction(Op, MVT::f16, Legal); | ||||||
548 | setOperationAction(Op, MVT::f32, Legal); | ||||||
549 | setOperationAction(Op, MVT::f64, Legal); | ||||||
550 | setOperationAction(Op, MVT::v2f16, Expand); | ||||||
551 | } | ||||||
552 | |||||||
553 | setOperationAction(ISD::FROUND, MVT::f16, Promote); | ||||||
554 | setOperationAction(ISD::FROUND, MVT::v2f16, Expand); | ||||||
555 | setOperationAction(ISD::FROUND, MVT::f32, Custom); | ||||||
556 | setOperationAction(ISD::FROUND, MVT::f64, Custom); | ||||||
557 | |||||||
558 | |||||||
559 | // 'Expand' implements FCOPYSIGN without calling an external library. | ||||||
560 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); | ||||||
561 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); | ||||||
562 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); | ||||||
563 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); | ||||||
564 | |||||||
565 | // These map to corresponding instructions for f32/f64. f16 must be | ||||||
566 | // promoted to f32. v2f16 is expanded to f16, which is then promoted | ||||||
567 | // to f32. | ||||||
568 | for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, | ||||||
569 | ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) { | ||||||
570 | setOperationAction(Op, MVT::f16, Promote); | ||||||
571 | setOperationAction(Op, MVT::f32, Legal); | ||||||
572 | setOperationAction(Op, MVT::f64, Legal); | ||||||
573 | setOperationAction(Op, MVT::v2f16, Expand); | ||||||
574 | } | ||||||
575 | setOperationAction(ISD::FMINNUM, MVT::f16, Promote); | ||||||
576 | setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); | ||||||
577 | setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); | ||||||
578 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); | ||||||
579 | |||||||
580 | // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate. | ||||||
581 | // No FPOW or FREM in PTX. | ||||||
582 | |||||||
583 | // Now deduce the information based on the above mentioned | ||||||
584 | // actions | ||||||
585 | computeRegisterProperties(STI.getRegisterInfo()); | ||||||
586 | } | ||||||
587 | |||||||
588 | const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { | ||||||
589 | switch ((NVPTXISD::NodeType)Opcode) { | ||||||
590 | case NVPTXISD::FIRST_NUMBER: | ||||||
591 | break; | ||||||
592 | case NVPTXISD::CALL: | ||||||
593 | return "NVPTXISD::CALL"; | ||||||
594 | case NVPTXISD::RET_FLAG: | ||||||
595 | return "NVPTXISD::RET_FLAG"; | ||||||
596 | case NVPTXISD::LOAD_PARAM: | ||||||
597 | return "NVPTXISD::LOAD_PARAM"; | ||||||
598 | case NVPTXISD::Wrapper: | ||||||
599 | return "NVPTXISD::Wrapper"; | ||||||
600 | case NVPTXISD::DeclareParam: | ||||||
601 | return "NVPTXISD::DeclareParam"; | ||||||
602 | case NVPTXISD::DeclareScalarParam: | ||||||
603 | return "NVPTXISD::DeclareScalarParam"; | ||||||
604 | case NVPTXISD::DeclareRet: | ||||||
605 | return "NVPTXISD::DeclareRet"; | ||||||
606 | case NVPTXISD::DeclareScalarRet: | ||||||
607 | return "NVPTXISD::DeclareScalarRet"; | ||||||
608 | case NVPTXISD::DeclareRetParam: | ||||||
609 | return "NVPTXISD::DeclareRetParam"; | ||||||
610 | case NVPTXISD::PrintCall: | ||||||
611 | return "NVPTXISD::PrintCall"; | ||||||
612 | case NVPTXISD::PrintConvergentCall: | ||||||
613 | return "NVPTXISD::PrintConvergentCall"; | ||||||
614 | case NVPTXISD::PrintCallUni: | ||||||
615 | return "NVPTXISD::PrintCallUni"; | ||||||
616 | case NVPTXISD::PrintConvergentCallUni: | ||||||
617 | return "NVPTXISD::PrintConvergentCallUni"; | ||||||
618 | case NVPTXISD::LoadParam: | ||||||
619 | return "NVPTXISD::LoadParam"; | ||||||
620 | case NVPTXISD::LoadParamV2: | ||||||
621 | return "NVPTXISD::LoadParamV2"; | ||||||
622 | case NVPTXISD::LoadParamV4: | ||||||
623 | return "NVPTXISD::LoadParamV4"; | ||||||
624 | case NVPTXISD::StoreParam: | ||||||
625 | return "NVPTXISD::StoreParam"; | ||||||
626 | case NVPTXISD::StoreParamV2: | ||||||
627 | return "NVPTXISD::StoreParamV2"; | ||||||
628 | case NVPTXISD::StoreParamV4: | ||||||
629 | return "NVPTXISD::StoreParamV4"; | ||||||
630 | case NVPTXISD::StoreParamS32: | ||||||
631 | return "NVPTXISD::StoreParamS32"; | ||||||
632 | case NVPTXISD::StoreParamU32: | ||||||
633 | return "NVPTXISD::StoreParamU32"; | ||||||
634 | case NVPTXISD::CallArgBegin: | ||||||
635 | return "NVPTXISD::CallArgBegin"; | ||||||
636 | case NVPTXISD::CallArg: | ||||||
637 | return "NVPTXISD::CallArg"; | ||||||
638 | case NVPTXISD::LastCallArg: | ||||||
639 | return "NVPTXISD::LastCallArg"; | ||||||
640 | case NVPTXISD::CallArgEnd: | ||||||
641 | return "NVPTXISD::CallArgEnd"; | ||||||
642 | case NVPTXISD::CallVoid: | ||||||
643 | return "NVPTXISD::CallVoid"; | ||||||
644 | case NVPTXISD::CallVal: | ||||||
645 | return "NVPTXISD::CallVal"; | ||||||
646 | case NVPTXISD::CallSymbol: | ||||||
647 | return "NVPTXISD::CallSymbol"; | ||||||
648 | case NVPTXISD::Prototype: | ||||||
649 | return "NVPTXISD::Prototype"; | ||||||
650 | case NVPTXISD::MoveParam: | ||||||
651 | return "NVPTXISD::MoveParam"; | ||||||
652 | case NVPTXISD::StoreRetval: | ||||||
653 | return "NVPTXISD::StoreRetval"; | ||||||
654 | case NVPTXISD::StoreRetvalV2: | ||||||
655 | return "NVPTXISD::StoreRetvalV2"; | ||||||
656 | case NVPTXISD::StoreRetvalV4: | ||||||
657 | return "NVPTXISD::StoreRetvalV4"; | ||||||
658 | case NVPTXISD::PseudoUseParam: | ||||||
659 | return "NVPTXISD::PseudoUseParam"; | ||||||
660 | case NVPTXISD::RETURN: | ||||||
661 | return "NVPTXISD::RETURN"; | ||||||
662 | case NVPTXISD::CallSeqBegin: | ||||||
663 | return "NVPTXISD::CallSeqBegin"; | ||||||
664 | case NVPTXISD::CallSeqEnd: | ||||||
665 | return "NVPTXISD::CallSeqEnd"; | ||||||
666 | case NVPTXISD::CallPrototype: | ||||||
667 | return "NVPTXISD::CallPrototype"; | ||||||
668 | case NVPTXISD::ProxyReg: | ||||||
669 | return "NVPTXISD::ProxyReg"; | ||||||
670 | case NVPTXISD::LoadV2: | ||||||
671 | return "NVPTXISD::LoadV2"; | ||||||
672 | case NVPTXISD::LoadV4: | ||||||
673 | return "NVPTXISD::LoadV4"; | ||||||
674 | case NVPTXISD::LDGV2: | ||||||
675 | return "NVPTXISD::LDGV2"; | ||||||
676 | case NVPTXISD::LDGV4: | ||||||
677 | return "NVPTXISD::LDGV4"; | ||||||
678 | case NVPTXISD::LDUV2: | ||||||
679 | return "NVPTXISD::LDUV2"; | ||||||
680 | case NVPTXISD::LDUV4: | ||||||
681 | return "NVPTXISD::LDUV4"; | ||||||
682 | case NVPTXISD::StoreV2: | ||||||
683 | return "NVPTXISD::StoreV2"; | ||||||
684 | case NVPTXISD::StoreV4: | ||||||
685 | return "NVPTXISD::StoreV4"; | ||||||
686 | case NVPTXISD::FUN_SHFL_CLAMP: | ||||||
687 | return "NVPTXISD::FUN_SHFL_CLAMP"; | ||||||
688 | case NVPTXISD::FUN_SHFR_CLAMP: | ||||||
689 | return "NVPTXISD::FUN_SHFR_CLAMP"; | ||||||
690 | case NVPTXISD::IMAD: | ||||||
691 | return "NVPTXISD::IMAD"; | ||||||
692 | case NVPTXISD::SETP_F16X2: | ||||||
693 | return "NVPTXISD::SETP_F16X2"; | ||||||
694 | case NVPTXISD::Dummy: | ||||||
695 | return "NVPTXISD::Dummy"; | ||||||
696 | case NVPTXISD::MUL_WIDE_SIGNED: | ||||||
697 | return "NVPTXISD::MUL_WIDE_SIGNED"; | ||||||
698 | case NVPTXISD::MUL_WIDE_UNSIGNED: | ||||||
699 | return "NVPTXISD::MUL_WIDE_UNSIGNED"; | ||||||
700 | case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; | ||||||
701 | case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; | ||||||
702 | case NVPTXISD::Tex1DFloatFloatLevel: | ||||||
703 | return "NVPTXISD::Tex1DFloatFloatLevel"; | ||||||
704 | case NVPTXISD::Tex1DFloatFloatGrad: | ||||||
705 | return "NVPTXISD::Tex1DFloatFloatGrad"; | ||||||
706 | case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; | ||||||
707 | case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; | ||||||
708 | case NVPTXISD::Tex1DS32FloatLevel: | ||||||
709 | return "NVPTXISD::Tex1DS32FloatLevel"; | ||||||
710 | case NVPTXISD::Tex1DS32FloatGrad: | ||||||
711 | return "NVPTXISD::Tex1DS32FloatGrad"; | ||||||
712 | case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; | ||||||
713 | case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; | ||||||
714 | case NVPTXISD::Tex1DU32FloatLevel: | ||||||
715 | return "NVPTXISD::Tex1DU32FloatLevel"; | ||||||
716 | case NVPTXISD::Tex1DU32FloatGrad: | ||||||
717 | return "NVPTXISD::Tex1DU32FloatGrad"; | ||||||
718 | case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; | ||||||
719 | case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; | ||||||
720 | case NVPTXISD::Tex1DArrayFloatFloatLevel: | ||||||
721 | return "NVPTXISD::Tex1DArrayFloatFloatLevel"; | ||||||
722 | case NVPTXISD::Tex1DArrayFloatFloatGrad: | ||||||
723 | return "NVPTXISD::Tex1DArrayFloatFloatGrad"; | ||||||
724 | case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; | ||||||
725 | case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; | ||||||
726 | case NVPTXISD::Tex1DArrayS32FloatLevel: | ||||||
727 | return "NVPTXISD::Tex1DArrayS32FloatLevel"; | ||||||
728 | case NVPTXISD::Tex1DArrayS32FloatGrad: | ||||||
729 | return "NVPTXISD::Tex1DArrayS32FloatGrad"; | ||||||
730 | case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; | ||||||
731 | case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; | ||||||
732 | case NVPTXISD::Tex1DArrayU32FloatLevel: | ||||||
733 | return "NVPTXISD::Tex1DArrayU32FloatLevel"; | ||||||
734 | case NVPTXISD::Tex1DArrayU32FloatGrad: | ||||||
735 | return "NVPTXISD::Tex1DArrayU32FloatGrad"; | ||||||
736 | case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; | ||||||
737 | case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; | ||||||
738 | case NVPTXISD::Tex2DFloatFloatLevel: | ||||||
739 | return "NVPTXISD::Tex2DFloatFloatLevel"; | ||||||
740 | case NVPTXISD::Tex2DFloatFloatGrad: | ||||||
741 | return "NVPTXISD::Tex2DFloatFloatGrad"; | ||||||
742 | case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; | ||||||
743 | case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; | ||||||
744 | case NVPTXISD::Tex2DS32FloatLevel: | ||||||
745 | return "NVPTXISD::Tex2DS32FloatLevel"; | ||||||
746 | case NVPTXISD::Tex2DS32FloatGrad: | ||||||
747 | return "NVPTXISD::Tex2DS32FloatGrad"; | ||||||
748 | case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; | ||||||
749 | case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; | ||||||
750 | case NVPTXISD::Tex2DU32FloatLevel: | ||||||
751 | return "NVPTXISD::Tex2DU32FloatLevel"; | ||||||
752 | case NVPTXISD::Tex2DU32FloatGrad: | ||||||
753 | return "NVPTXISD::Tex2DU32FloatGrad"; | ||||||
754 | case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; | ||||||
755 | case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; | ||||||
756 | case NVPTXISD::Tex2DArrayFloatFloatLevel: | ||||||
757 | return "NVPTXISD::Tex2DArrayFloatFloatLevel"; | ||||||
758 | case NVPTXISD::Tex2DArrayFloatFloatGrad: | ||||||
759 | return "NVPTXISD::Tex2DArrayFloatFloatGrad"; | ||||||
760 | case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; | ||||||
761 | case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; | ||||||
762 | case NVPTXISD::Tex2DArrayS32FloatLevel: | ||||||
763 | return "NVPTXISD::Tex2DArrayS32FloatLevel"; | ||||||
764 | case NVPTXISD::Tex2DArrayS32FloatGrad: | ||||||
765 | return "NVPTXISD::Tex2DArrayS32FloatGrad"; | ||||||
766 | case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; | ||||||
767 | case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; | ||||||
768 | case NVPTXISD::Tex2DArrayU32FloatLevel: | ||||||
769 | return "NVPTXISD::Tex2DArrayU32FloatLevel"; | ||||||
770 | case NVPTXISD::Tex2DArrayU32FloatGrad: | ||||||
771 | return "NVPTXISD::Tex2DArrayU32FloatGrad"; | ||||||
772 | case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; | ||||||
773 | case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; | ||||||
774 | case NVPTXISD::Tex3DFloatFloatLevel: | ||||||
775 | return "NVPTXISD::Tex3DFloatFloatLevel"; | ||||||
776 | case NVPTXISD::Tex3DFloatFloatGrad: | ||||||
777 | return "NVPTXISD::Tex3DFloatFloatGrad"; | ||||||
778 | case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; | ||||||
779 | case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; | ||||||
780 | case NVPTXISD::Tex3DS32FloatLevel: | ||||||
781 | return "NVPTXISD::Tex3DS32FloatLevel"; | ||||||
782 | case NVPTXISD::Tex3DS32FloatGrad: | ||||||
783 | return "NVPTXISD::Tex3DS32FloatGrad"; | ||||||
784 | case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; | ||||||
785 | case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; | ||||||
786 | case NVPTXISD::Tex3DU32FloatLevel: | ||||||
787 | return "NVPTXISD::Tex3DU32FloatLevel"; | ||||||
788 | case NVPTXISD::Tex3DU32FloatGrad: | ||||||
789 | return "NVPTXISD::Tex3DU32FloatGrad"; | ||||||
790 | case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; | ||||||
791 | case NVPTXISD::TexCubeFloatFloatLevel: | ||||||
792 | return "NVPTXISD::TexCubeFloatFloatLevel"; | ||||||
793 | case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; | ||||||
794 | case NVPTXISD::TexCubeS32FloatLevel: | ||||||
795 | return "NVPTXISD::TexCubeS32FloatLevel"; | ||||||
796 | case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; | ||||||
797 | case NVPTXISD::TexCubeU32FloatLevel: | ||||||
798 | return "NVPTXISD::TexCubeU32FloatLevel"; | ||||||
799 | case NVPTXISD::TexCubeArrayFloatFloat: | ||||||
800 | return "NVPTXISD::TexCubeArrayFloatFloat"; | ||||||
801 | case NVPTXISD::TexCubeArrayFloatFloatLevel: | ||||||
802 | return "NVPTXISD::TexCubeArrayFloatFloatLevel"; | ||||||
803 | case NVPTXISD::TexCubeArrayS32Float: | ||||||
804 | return "NVPTXISD::TexCubeArrayS32Float"; | ||||||
805 | case NVPTXISD::TexCubeArrayS32FloatLevel: | ||||||
806 | return "NVPTXISD::TexCubeArrayS32FloatLevel"; | ||||||
807 | case NVPTXISD::TexCubeArrayU32Float: | ||||||
808 | return "NVPTXISD::TexCubeArrayU32Float"; | ||||||
809 | case NVPTXISD::TexCubeArrayU32FloatLevel: | ||||||
810 | return "NVPTXISD::TexCubeArrayU32FloatLevel"; | ||||||
811 | case NVPTXISD::Tld4R2DFloatFloat: | ||||||
812 | return "NVPTXISD::Tld4R2DFloatFloat"; | ||||||
813 | case NVPTXISD::Tld4G2DFloatFloat: | ||||||
814 | return "NVPTXISD::Tld4G2DFloatFloat"; | ||||||
815 | case NVPTXISD::Tld4B2DFloatFloat: | ||||||
816 | return "NVPTXISD::Tld4B2DFloatFloat"; | ||||||
817 | case NVPTXISD::Tld4A2DFloatFloat: | ||||||
818 | return "NVPTXISD::Tld4A2DFloatFloat"; | ||||||
819 | case NVPTXISD::Tld4R2DS64Float: | ||||||
820 | return "NVPTXISD::Tld4R2DS64Float"; | ||||||
821 | case NVPTXISD::Tld4G2DS64Float: | ||||||
822 | return "NVPTXISD::Tld4G2DS64Float"; | ||||||
823 | case NVPTXISD::Tld4B2DS64Float: | ||||||
824 | return "NVPTXISD::Tld4B2DS64Float"; | ||||||
825 | case NVPTXISD::Tld4A2DS64Float: | ||||||
826 | return "NVPTXISD::Tld4A2DS64Float"; | ||||||
827 | case NVPTXISD::Tld4R2DU64Float: | ||||||
828 | return "NVPTXISD::Tld4R2DU64Float"; | ||||||
829 | case NVPTXISD::Tld4G2DU64Float: | ||||||
830 | return "NVPTXISD::Tld4G2DU64Float"; | ||||||
831 | case NVPTXISD::Tld4B2DU64Float: | ||||||
832 | return "NVPTXISD::Tld4B2DU64Float"; | ||||||
833 | case NVPTXISD::Tld4A2DU64Float: | ||||||
834 | return "NVPTXISD::Tld4A2DU64Float"; | ||||||
835 | |||||||
836 | case NVPTXISD::TexUnified1DFloatS32: | ||||||
837 | return "NVPTXISD::TexUnified1DFloatS32"; | ||||||
838 | case NVPTXISD::TexUnified1DFloatFloat: | ||||||
839 | return "NVPTXISD::TexUnified1DFloatFloat"; | ||||||
840 | case NVPTXISD::TexUnified1DFloatFloatLevel: | ||||||
841 | return "NVPTXISD::TexUnified1DFloatFloatLevel"; | ||||||
842 | case NVPTXISD::TexUnified1DFloatFloatGrad: | ||||||
843 | return "NVPTXISD::TexUnified1DFloatFloatGrad"; | ||||||
844 | case NVPTXISD::TexUnified1DS32S32: | ||||||
845 | return "NVPTXISD::TexUnified1DS32S32"; | ||||||
846 | case NVPTXISD::TexUnified1DS32Float: | ||||||
847 | return "NVPTXISD::TexUnified1DS32Float"; | ||||||
848 | case NVPTXISD::TexUnified1DS32FloatLevel: | ||||||
849 | return "NVPTXISD::TexUnified1DS32FloatLevel"; | ||||||
850 | case NVPTXISD::TexUnified1DS32FloatGrad: | ||||||
851 | return "NVPTXISD::TexUnified1DS32FloatGrad"; | ||||||
852 | case NVPTXISD::TexUnified1DU32S32: | ||||||
853 | return "NVPTXISD::TexUnified1DU32S32"; | ||||||
854 | case NVPTXISD::TexUnified1DU32Float: | ||||||
855 | return "NVPTXISD::TexUnified1DU32Float"; | ||||||
856 | case NVPTXISD::TexUnified1DU32FloatLevel: | ||||||
857 | return "NVPTXISD::TexUnified1DU32FloatLevel"; | ||||||
858 | case NVPTXISD::TexUnified1DU32FloatGrad: | ||||||
859 | return "NVPTXISD::TexUnified1DU32FloatGrad"; | ||||||
860 | case NVPTXISD::TexUnified1DArrayFloatS32: | ||||||
861 | return "NVPTXISD::TexUnified1DArrayFloatS32"; | ||||||
862 | case NVPTXISD::TexUnified1DArrayFloatFloat: | ||||||
863 | return "NVPTXISD::TexUnified1DArrayFloatFloat"; | ||||||
864 | case NVPTXISD::TexUnified1DArrayFloatFloatLevel: | ||||||
865 | return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; | ||||||
866 | case NVPTXISD::TexUnified1DArrayFloatFloatGrad: | ||||||
867 | return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; | ||||||
868 | case NVPTXISD::TexUnified1DArrayS32S32: | ||||||
869 | return "NVPTXISD::TexUnified1DArrayS32S32"; | ||||||
870 | case NVPTXISD::TexUnified1DArrayS32Float: | ||||||
871 | return "NVPTXISD::TexUnified1DArrayS32Float"; | ||||||
872 | case NVPTXISD::TexUnified1DArrayS32FloatLevel: | ||||||
873 | return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; | ||||||
874 | case NVPTXISD::TexUnified1DArrayS32FloatGrad: | ||||||
875 | return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; | ||||||
876 | case NVPTXISD::TexUnified1DArrayU32S32: | ||||||
877 | return "NVPTXISD::TexUnified1DArrayU32S32"; | ||||||
878 | case NVPTXISD::TexUnified1DArrayU32Float: | ||||||
879 | return "NVPTXISD::TexUnified1DArrayU32Float"; | ||||||
880 | case NVPTXISD::TexUnified1DArrayU32FloatLevel: | ||||||
881 | return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; | ||||||
882 | case NVPTXISD::TexUnified1DArrayU32FloatGrad: | ||||||
883 | return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; | ||||||
884 | case NVPTXISD::TexUnified2DFloatS32: | ||||||
885 | return "NVPTXISD::TexUnified2DFloatS32"; | ||||||
886 | case NVPTXISD::TexUnified2DFloatFloat: | ||||||
887 | return "NVPTXISD::TexUnified2DFloatFloat"; | ||||||
888 | case NVPTXISD::TexUnified2DFloatFloatLevel: | ||||||
889 | return "NVPTXISD::TexUnified2DFloatFloatLevel"; | ||||||
890 | case NVPTXISD::TexUnified2DFloatFloatGrad: | ||||||
891 | return "NVPTXISD::TexUnified2DFloatFloatGrad"; | ||||||
892 | case NVPTXISD::TexUnified2DS32S32: | ||||||
893 | return "NVPTXISD::TexUnified2DS32S32"; | ||||||
894 | case NVPTXISD::TexUnified2DS32Float: | ||||||
895 | return "NVPTXISD::TexUnified2DS32Float"; | ||||||
896 | case NVPTXISD::TexUnified2DS32FloatLevel: | ||||||
897 | return "NVPTXISD::TexUnified2DS32FloatLevel"; | ||||||
898 | case NVPTXISD::TexUnified2DS32FloatGrad: | ||||||
899 | return "NVPTXISD::TexUnified2DS32FloatGrad"; | ||||||
900 | case NVPTXISD::TexUnified2DU32S32: | ||||||
901 | return "NVPTXISD::TexUnified2DU32S32"; | ||||||
902 | case NVPTXISD::TexUnified2DU32Float: | ||||||
903 | return "NVPTXISD::TexUnified2DU32Float"; | ||||||
904 | case NVPTXISD::TexUnified2DU32FloatLevel: | ||||||
905 | return "NVPTXISD::TexUnified2DU32FloatLevel"; | ||||||
906 | case NVPTXISD::TexUnified2DU32FloatGrad: | ||||||
907 | return "NVPTXISD::TexUnified2DU32FloatGrad"; | ||||||
908 | case NVPTXISD::TexUnified2DArrayFloatS32: | ||||||
909 | return "NVPTXISD::TexUnified2DArrayFloatS32"; | ||||||
910 | case NVPTXISD::TexUnified2DArrayFloatFloat: | ||||||
911 | return "NVPTXISD::TexUnified2DArrayFloatFloat"; | ||||||
912 | case NVPTXISD::TexUnified2DArrayFloatFloatLevel: | ||||||
913 | return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; | ||||||
914 | case NVPTXISD::TexUnified2DArrayFloatFloatGrad: | ||||||
915 | return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; | ||||||
916 | case NVPTXISD::TexUnified2DArrayS32S32: | ||||||
917 | return "NVPTXISD::TexUnified2DArrayS32S32"; | ||||||
918 | case NVPTXISD::TexUnified2DArrayS32Float: | ||||||
919 | return "NVPTXISD::TexUnified2DArrayS32Float"; | ||||||
920 | case NVPTXISD::TexUnified2DArrayS32FloatLevel: | ||||||
921 | return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; | ||||||
922 | case NVPTXISD::TexUnified2DArrayS32FloatGrad: | ||||||
923 | return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; | ||||||
924 | case NVPTXISD::TexUnified2DArrayU32S32: | ||||||
925 | return "NVPTXISD::TexUnified2DArrayU32S32"; | ||||||
926 | case NVPTXISD::TexUnified2DArrayU32Float: | ||||||
927 | return "NVPTXISD::TexUnified2DArrayU32Float"; | ||||||
928 | case NVPTXISD::TexUnified2DArrayU32FloatLevel: | ||||||
929 | return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; | ||||||
930 | case NVPTXISD::TexUnified2DArrayU32FloatGrad: | ||||||
931 | return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; | ||||||
932 | case NVPTXISD::TexUnified3DFloatS32: | ||||||
933 | return "NVPTXISD::TexUnified3DFloatS32"; | ||||||
934 | case NVPTXISD::TexUnified3DFloatFloat: | ||||||
935 | return "NVPTXISD::TexUnified3DFloatFloat"; | ||||||
936 | case NVPTXISD::TexUnified3DFloatFloatLevel: | ||||||
937 | return "NVPTXISD::TexUnified3DFloatFloatLevel"; | ||||||
938 | case NVPTXISD::TexUnified3DFloatFloatGrad: | ||||||
939 | return "NVPTXISD::TexUnified3DFloatFloatGrad"; | ||||||
940 | case NVPTXISD::TexUnified3DS32S32: | ||||||
941 | return "NVPTXISD::TexUnified3DS32S32"; | ||||||
942 | case NVPTXISD::TexUnified3DS32Float: | ||||||
943 | return "NVPTXISD::TexUnified3DS32Float"; | ||||||
944 | case NVPTXISD::TexUnified3DS32FloatLevel: | ||||||
945 | return "NVPTXISD::TexUnified3DS32FloatLevel"; | ||||||
946 | case NVPTXISD::TexUnified3DS32FloatGrad: | ||||||
947 | return "NVPTXISD::TexUnified3DS32FloatGrad"; | ||||||
948 | case NVPTXISD::TexUnified3DU32S32: | ||||||
949 | return "NVPTXISD::TexUnified3DU32S32"; | ||||||
950 | case NVPTXISD::TexUnified3DU32Float: | ||||||
951 | return "NVPTXISD::TexUnified3DU32Float"; | ||||||
952 | case NVPTXISD::TexUnified3DU32FloatLevel: | ||||||
953 | return "NVPTXISD::TexUnified3DU32FloatLevel"; | ||||||
954 | case NVPTXISD::TexUnified3DU32FloatGrad: | ||||||
955 | return "NVPTXISD::TexUnified3DU32FloatGrad"; | ||||||
956 | case NVPTXISD::TexUnifiedCubeFloatFloat: | ||||||
957 | return "NVPTXISD::TexUnifiedCubeFloatFloat"; | ||||||
958 | case NVPTXISD::TexUnifiedCubeFloatFloatLevel: | ||||||
959 | return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; | ||||||
960 | case NVPTXISD::TexUnifiedCubeS32Float: | ||||||
961 | return "NVPTXISD::TexUnifiedCubeS32Float"; | ||||||
962 | case NVPTXISD::TexUnifiedCubeS32FloatLevel: | ||||||
963 | return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; | ||||||
964 | case NVPTXISD::TexUnifiedCubeU32Float: | ||||||
965 | return "NVPTXISD::TexUnifiedCubeU32Float"; | ||||||
966 | case NVPTXISD::TexUnifiedCubeU32FloatLevel: | ||||||
967 | return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; | ||||||
968 | case NVPTXISD::TexUnifiedCubeArrayFloatFloat: | ||||||
969 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; | ||||||
970 | case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: | ||||||
971 | return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; | ||||||
972 | case NVPTXISD::TexUnifiedCubeArrayS32Float: | ||||||
973 | return "NVPTXISD::TexUnifiedCubeArrayS32Float"; | ||||||
974 | case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: | ||||||
975 | return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; | ||||||
976 | case NVPTXISD::TexUnifiedCubeArrayU32Float: | ||||||
977 | return "NVPTXISD::TexUnifiedCubeArrayU32Float"; | ||||||
978 | case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: | ||||||
979 | return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; | ||||||
980 | case NVPTXISD::Tld4UnifiedR2DFloatFloat: | ||||||
981 | return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; | ||||||
982 | case NVPTXISD::Tld4UnifiedG2DFloatFloat: | ||||||
983 | return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; | ||||||
984 | case NVPTXISD::Tld4UnifiedB2DFloatFloat: | ||||||
985 | return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; | ||||||
986 | case NVPTXISD::Tld4UnifiedA2DFloatFloat: | ||||||
987 | return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; | ||||||
988 | case NVPTXISD::Tld4UnifiedR2DS64Float: | ||||||
989 | return "NVPTXISD::Tld4UnifiedR2DS64Float"; | ||||||
990 | case NVPTXISD::Tld4UnifiedG2DS64Float: | ||||||
991 | return "NVPTXISD::Tld4UnifiedG2DS64Float"; | ||||||
992 | case NVPTXISD::Tld4UnifiedB2DS64Float: | ||||||
993 | return "NVPTXISD::Tld4UnifiedB2DS64Float"; | ||||||
994 | case NVPTXISD::Tld4UnifiedA2DS64Float: | ||||||
995 | return "NVPTXISD::Tld4UnifiedA2DS64Float"; | ||||||
996 | case NVPTXISD::Tld4UnifiedR2DU64Float: | ||||||
997 | return "NVPTXISD::Tld4UnifiedR2DU64Float"; | ||||||
998 | case NVPTXISD::Tld4UnifiedG2DU64Float: | ||||||
999 | return "NVPTXISD::Tld4UnifiedG2DU64Float"; | ||||||
1000 | case NVPTXISD::Tld4UnifiedB2DU64Float: | ||||||
1001 | return "NVPTXISD::Tld4UnifiedB2DU64Float"; | ||||||
1002 | case NVPTXISD::Tld4UnifiedA2DU64Float: | ||||||
1003 | return "NVPTXISD::Tld4UnifiedA2DU64Float"; | ||||||
1004 | |||||||
1005 | case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; | ||||||
1006 | case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; | ||||||
1007 | case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; | ||||||
1008 | case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; | ||||||
1009 | case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; | ||||||
1010 | case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; | ||||||
1011 | case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; | ||||||
1012 | case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; | ||||||
1013 | case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; | ||||||
1014 | case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; | ||||||
1015 | case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; | ||||||
1016 | |||||||
1017 | case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; | ||||||
1018 | case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; | ||||||
1019 | case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; | ||||||
1020 | case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; | ||||||
1021 | case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; | ||||||
1022 | case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; | ||||||
1023 | case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; | ||||||
1024 | case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; | ||||||
1025 | case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; | ||||||
1026 | case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; | ||||||
1027 | case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; | ||||||
1028 | |||||||
1029 | case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; | ||||||
1030 | case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; | ||||||
1031 | case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; | ||||||
1032 | case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; | ||||||
1033 | case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; | ||||||
1034 | case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; | ||||||
1035 | case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; | ||||||
1036 | case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; | ||||||
1037 | case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; | ||||||
1038 | case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; | ||||||
1039 | case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; | ||||||
1040 | |||||||
1041 | case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; | ||||||
1042 | case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; | ||||||
1043 | case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; | ||||||
1044 | case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; | ||||||
1045 | case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; | ||||||
1046 | case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; | ||||||
1047 | case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; | ||||||
1048 | case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; | ||||||
1049 | case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; | ||||||
1050 | case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; | ||||||
1051 | case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; | ||||||
1052 | |||||||
1053 | case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; | ||||||
1054 | case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; | ||||||
1055 | case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; | ||||||
1056 | case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; | ||||||
1057 | case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; | ||||||
1058 | case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; | ||||||
1059 | case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; | ||||||
1060 | case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; | ||||||
1061 | case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; | ||||||
1062 | case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; | ||||||
1063 | case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; | ||||||
1064 | |||||||
1065 | case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; | ||||||
1066 | case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; | ||||||
1067 | case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; | ||||||
1068 | case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; | ||||||
1069 | case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; | ||||||
1070 | case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; | ||||||
1071 | case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; | ||||||
1072 | case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; | ||||||
1073 | case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; | ||||||
1074 | case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; | ||||||
1075 | case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; | ||||||
1076 | |||||||
1077 | case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; | ||||||
1078 | case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; | ||||||
1079 | case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; | ||||||
1080 | case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; | ||||||
1081 | case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; | ||||||
1082 | case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; | ||||||
1083 | case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; | ||||||
1084 | case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; | ||||||
1085 | case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; | ||||||
1086 | case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; | ||||||
1087 | case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; | ||||||
1088 | |||||||
1089 | case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; | ||||||
1090 | case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; | ||||||
1091 | case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; | ||||||
1092 | case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; | ||||||
1093 | case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; | ||||||
1094 | case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; | ||||||
1095 | case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; | ||||||
1096 | case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; | ||||||
1097 | case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; | ||||||
1098 | case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; | ||||||
1099 | case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; | ||||||
1100 | |||||||
1101 | case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; | ||||||
1102 | case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; | ||||||
1103 | case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; | ||||||
1104 | case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; | ||||||
1105 | case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; | ||||||
1106 | case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; | ||||||
1107 | case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; | ||||||
1108 | case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; | ||||||
1109 | case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; | ||||||
1110 | case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; | ||||||
1111 | case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; | ||||||
1112 | |||||||
1113 | case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; | ||||||
1114 | case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; | ||||||
1115 | case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; | ||||||
1116 | case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; | ||||||
1117 | case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; | ||||||
1118 | case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; | ||||||
1119 | case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; | ||||||
1120 | case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; | ||||||
1121 | case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; | ||||||
1122 | case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; | ||||||
1123 | case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; | ||||||
1124 | |||||||
1125 | case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; | ||||||
1126 | case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; | ||||||
1127 | case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; | ||||||
1128 | case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; | ||||||
1129 | case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; | ||||||
1130 | case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; | ||||||
1131 | case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; | ||||||
1132 | case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; | ||||||
1133 | case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; | ||||||
1134 | case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; | ||||||
1135 | case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; | ||||||
1136 | |||||||
1137 | case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; | ||||||
1138 | case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; | ||||||
1139 | case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; | ||||||
1140 | case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; | ||||||
1141 | case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; | ||||||
1142 | case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; | ||||||
1143 | case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; | ||||||
1144 | case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; | ||||||
1145 | case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; | ||||||
1146 | case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; | ||||||
1147 | case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; | ||||||
1148 | |||||||
1149 | case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; | ||||||
1150 | case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; | ||||||
1151 | case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; | ||||||
1152 | case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; | ||||||
1153 | case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; | ||||||
1154 | case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; | ||||||
1155 | case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; | ||||||
1156 | case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; | ||||||
1157 | case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; | ||||||
1158 | case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; | ||||||
1159 | case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; | ||||||
1160 | |||||||
1161 | case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; | ||||||
1162 | case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; | ||||||
1163 | case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; | ||||||
1164 | case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; | ||||||
1165 | case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; | ||||||
1166 | case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; | ||||||
1167 | case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; | ||||||
1168 | case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; | ||||||
1169 | case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; | ||||||
1170 | case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; | ||||||
1171 | case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; | ||||||
1172 | |||||||
1173 | case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; | ||||||
1174 | case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; | ||||||
1175 | case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; | ||||||
1176 | case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; | ||||||
1177 | case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; | ||||||
1178 | case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; | ||||||
1179 | case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; | ||||||
1180 | case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; | ||||||
1181 | case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; | ||||||
1182 | case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; | ||||||
1183 | case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; | ||||||
1184 | } | ||||||
1185 | return nullptr; | ||||||
1186 | } | ||||||
1187 | |||||||
1188 | TargetLoweringBase::LegalizeTypeAction | ||||||
1189 | NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { | ||||||
1190 | if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) | ||||||
1191 | return TypeSplitVector; | ||||||
1192 | if (VT == MVT::v2f16) | ||||||
1193 | return TypeLegal; | ||||||
1194 | return TargetLoweringBase::getPreferredVectorAction(VT); | ||||||
1195 | } | ||||||
1196 | |||||||
1197 | SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, | ||||||
1198 | int Enabled, int &ExtraSteps, | ||||||
1199 | bool &UseOneConst, | ||||||
1200 | bool Reciprocal) const { | ||||||
1201 | if (!(Enabled == ReciprocalEstimate::Enabled || | ||||||
1202 | (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32()))) | ||||||
1203 | return SDValue(); | ||||||
1204 | |||||||
1205 | if (ExtraSteps == ReciprocalEstimate::Unspecified) | ||||||
1206 | ExtraSteps = 0; | ||||||
1207 | |||||||
1208 | SDLoc DL(Operand); | ||||||
1209 | EVT VT = Operand.getValueType(); | ||||||
1210 | bool Ftz = useF32FTZ(DAG.getMachineFunction()); | ||||||
1211 | |||||||
1212 | auto MakeIntrinsicCall = [&](Intrinsic::ID IID) { | ||||||
1213 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, | ||||||
1214 | DAG.getConstant(IID, DL, MVT::i32), Operand); | ||||||
1215 | }; | ||||||
1216 | |||||||
1217 | // The sqrt and rsqrt refinement processes assume we always start out with an | ||||||
1218 | // approximation of the rsqrt. Therefore, if we're going to do any refinement | ||||||
1219 | // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing | ||||||
1220 | // any refinement, we must return a regular sqrt. | ||||||
1221 | if (Reciprocal || ExtraSteps > 0) { | ||||||
1222 | if (VT == MVT::f32) | ||||||
1223 | return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f | ||||||
1224 | : Intrinsic::nvvm_rsqrt_approx_f); | ||||||
1225 | else if (VT == MVT::f64) | ||||||
1226 | return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d); | ||||||
1227 | else | ||||||
1228 | return SDValue(); | ||||||
1229 | } else { | ||||||
1230 | if (VT == MVT::f32) | ||||||
1231 | return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f | ||||||
1232 | : Intrinsic::nvvm_sqrt_approx_f); | ||||||
1233 | else { | ||||||
1234 | // There's no sqrt.approx.f64 instruction, so we emit | ||||||
1235 | // reciprocal(rsqrt(x)). This is faster than | ||||||
1236 | // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain | ||||||
1237 | // x * rsqrt(x).) | ||||||
1238 | return DAG.getNode( | ||||||
1239 | ISD::INTRINSIC_WO_CHAIN, DL, VT, | ||||||
1240 | DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), | ||||||
1241 | MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); | ||||||
1242 | } | ||||||
1243 | } | ||||||
1244 | } | ||||||
1245 | |||||||
1246 | SDValue | ||||||
1247 | NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { | ||||||
1248 | SDLoc dl(Op); | ||||||
1249 | const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op); | ||||||
1250 | auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace()); | ||||||
1251 | Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT); | ||||||
1252 | return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); | ||||||
1253 | } | ||||||
1254 | |||||||
1255 | std::string NVPTXTargetLowering::getPrototype( | ||||||
1256 | const DataLayout &DL, Type *retTy, const ArgListTy &Args, | ||||||
1257 | const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, | ||||||
1258 | ImmutableCallSite CS) const { | ||||||
1259 | auto PtrVT = getPointerTy(DL); | ||||||
1260 | |||||||
1261 | bool isABI = (STI.getSmVersion() >= 20); | ||||||
1262 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1262, __PRETTY_FUNCTION__)); | ||||||
1263 | if (!isABI) | ||||||
1264 | return ""; | ||||||
1265 | |||||||
1266 | std::stringstream O; | ||||||
1267 | O << "prototype_" << uniqueCallSite << " : .callprototype "; | ||||||
1268 | |||||||
1269 | if (retTy->getTypeID() == Type::VoidTyID) { | ||||||
1270 | O << "()"; | ||||||
1271 | } else { | ||||||
1272 | O << "("; | ||||||
1273 | if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) { | ||||||
1274 | unsigned size = 0; | ||||||
1275 | if (auto *ITy = dyn_cast<IntegerType>(retTy)) { | ||||||
1276 | size = ITy->getBitWidth(); | ||||||
1277 | } else { | ||||||
1278 | assert(retTy->isFloatingPointTy() &&((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1279, __PRETTY_FUNCTION__)) | ||||||
1279 | "Floating point type expected here")((retTy->isFloatingPointTy() && "Floating point type expected here" ) ? static_cast<void> (0) : __assert_fail ("retTy->isFloatingPointTy() && \"Floating point type expected here\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1279, __PRETTY_FUNCTION__)); | ||||||
1280 | size = retTy->getPrimitiveSizeInBits(); | ||||||
1281 | } | ||||||
1282 | // PTX ABI requires all scalar return values to be at least 32 | ||||||
1283 | // bits in size. fp16 normally uses .b16 as its storage type in | ||||||
1284 | // PTX, so its size must be adjusted here, too. | ||||||
1285 | if (size < 32) | ||||||
1286 | size = 32; | ||||||
1287 | |||||||
1288 | O << ".param .b" << size << " _"; | ||||||
1289 | } else if (isa<PointerType>(retTy)) { | ||||||
1290 | O << ".param .b" << PtrVT.getSizeInBits() << " _"; | ||||||
1291 | } else if (retTy->isAggregateType() || retTy->isVectorTy() || | ||||||
1292 | retTy->isIntegerTy(128)) { | ||||||
1293 | O << ".param .align " << retAlignment << " .b8 _[" | ||||||
1294 | << DL.getTypeAllocSize(retTy) << "]"; | ||||||
1295 | } else { | ||||||
1296 | llvm_unreachable("Unknown return type")::llvm::llvm_unreachable_internal("Unknown return type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1296); | ||||||
1297 | } | ||||||
1298 | O << ") "; | ||||||
1299 | } | ||||||
1300 | O << "_ ("; | ||||||
1301 | |||||||
1302 | bool first = true; | ||||||
1303 | |||||||
1304 | unsigned OIdx = 0; | ||||||
1305 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | ||||||
1306 | Type *Ty = Args[i].Ty; | ||||||
1307 | if (!first) { | ||||||
1308 | O << ", "; | ||||||
1309 | } | ||||||
1310 | first = false; | ||||||
1311 | |||||||
1312 | if (!Outs[OIdx].Flags.isByVal()) { | ||||||
1313 | if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { | ||||||
1314 | unsigned align = 0; | ||||||
1315 | const CallInst *CallI = cast<CallInst>(CS.getInstruction()); | ||||||
1316 | // +1 because index 0 is reserved for return type alignment | ||||||
1317 | if (!getAlign(*CallI, i + 1, align)) | ||||||
1318 | align = DL.getABITypeAlignment(Ty); | ||||||
1319 | unsigned sz = DL.getTypeAllocSize(Ty); | ||||||
1320 | O << ".param .align " << align << " .b8 "; | ||||||
1321 | O << "_"; | ||||||
1322 | O << "[" << sz << "]"; | ||||||
1323 | // update the index for Outs | ||||||
1324 | SmallVector<EVT, 16> vtparts; | ||||||
1325 | ComputeValueVTs(*this, DL, Ty, vtparts); | ||||||
1326 | if (unsigned len = vtparts.size()) | ||||||
1327 | OIdx += len - 1; | ||||||
1328 | continue; | ||||||
1329 | } | ||||||
1330 | // i8 types in IR will be i16 types in SDAG | ||||||
1331 | assert((getValueType(DL, Ty) == Outs[OIdx].VT ||(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1333, __PRETTY_FUNCTION__)) | ||||||
1332 | (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1333, __PRETTY_FUNCTION__)) | ||||||
1333 | "type mismatch between callee prototype and arguments")(((getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments") ? static_cast <void> (0) : __assert_fail ("(getValueType(DL, Ty) == Outs[OIdx].VT || (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && \"type mismatch between callee prototype and arguments\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1333, __PRETTY_FUNCTION__)); | ||||||
1334 | // scalar type | ||||||
1335 | unsigned sz = 0; | ||||||
1336 | if (isa<IntegerType>(Ty)) { | ||||||
1337 | sz = cast<IntegerType>(Ty)->getBitWidth(); | ||||||
1338 | if (sz < 32) | ||||||
1339 | sz = 32; | ||||||
1340 | } else if (isa<PointerType>(Ty)) { | ||||||
1341 | sz = PtrVT.getSizeInBits(); | ||||||
1342 | } else if (Ty->isHalfTy()) | ||||||
1343 | // PTX ABI requires all scalar parameters to be at least 32 | ||||||
1344 | // bits in size. fp16 normally uses .b16 as its storage type | ||||||
1345 | // in PTX, so its size must be adjusted here, too. | ||||||
1346 | sz = 32; | ||||||
1347 | else | ||||||
1348 | sz = Ty->getPrimitiveSizeInBits(); | ||||||
1349 | O << ".param .b" << sz << " "; | ||||||
1350 | O << "_"; | ||||||
1351 | continue; | ||||||
1352 | } | ||||||
1353 | auto *PTy = dyn_cast<PointerType>(Ty); | ||||||
1354 | assert(PTy && "Param with byval attribute should be a pointer type")((PTy && "Param with byval attribute should be a pointer type" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Param with byval attribute should be a pointer type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1354, __PRETTY_FUNCTION__)); | ||||||
1355 | Type *ETy = PTy->getElementType(); | ||||||
1356 | |||||||
1357 | Align align = Outs[OIdx].Flags.getNonZeroByValAlign(); | ||||||
1358 | unsigned sz = DL.getTypeAllocSize(ETy); | ||||||
1359 | O << ".param .align " << align.value() << " .b8 "; | ||||||
1360 | O << "_"; | ||||||
1361 | O << "[" << sz << "]"; | ||||||
1362 | } | ||||||
1363 | O << ");"; | ||||||
1364 | return O.str(); | ||||||
1365 | } | ||||||
1366 | |||||||
1367 | unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, | ||||||
1368 | ImmutableCallSite CS, | ||||||
1369 | Type *Ty, unsigned Idx, | ||||||
1370 | const DataLayout &DL) const { | ||||||
1371 | if (!CS) { | ||||||
1372 | // CallSite is zero, fallback to ABI type alignment | ||||||
1373 | return DL.getABITypeAlignment(Ty); | ||||||
1374 | } | ||||||
1375 | |||||||
1376 | unsigned Align = 0; | ||||||
1377 | const Value *DirectCallee = CS.getCalledFunction(); | ||||||
1378 | |||||||
1379 | if (!DirectCallee) { | ||||||
1380 | // We don't have a direct function symbol, but that may be because of | ||||||
1381 | // constant cast instructions in the call. | ||||||
1382 | const Instruction *CalleeI = CS.getInstruction(); | ||||||
1383 | assert(CalleeI && "Call target is not a function or derived value?")((CalleeI && "Call target is not a function or derived value?" ) ? static_cast<void> (0) : __assert_fail ("CalleeI && \"Call target is not a function or derived value?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1383, __PRETTY_FUNCTION__)); | ||||||
1384 | |||||||
1385 | // With bitcast'd call targets, the instruction will be the call | ||||||
1386 | if (isa<CallInst>(CalleeI)) { | ||||||
1387 | // Check if we have call alignment metadata | ||||||
1388 | if (getAlign(*cast<CallInst>(CalleeI), Idx, Align)) | ||||||
1389 | return Align; | ||||||
1390 | |||||||
1391 | const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); | ||||||
1392 | // Ignore any bitcast instructions | ||||||
1393 | while (isa<ConstantExpr>(CalleeV)) { | ||||||
1394 | const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); | ||||||
1395 | if (!CE->isCast()) | ||||||
1396 | break; | ||||||
1397 | // Look through the bitcast | ||||||
1398 | CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); | ||||||
1399 | } | ||||||
1400 | |||||||
1401 | // We have now looked past all of the bitcasts. Do we finally have a | ||||||
1402 | // Function? | ||||||
1403 | if (isa<Function>(CalleeV)) | ||||||
1404 | DirectCallee = CalleeV; | ||||||
1405 | } | ||||||
1406 | } | ||||||
1407 | |||||||
1408 | // Check for function alignment information if we found that the | ||||||
1409 | // ultimate target is a Function | ||||||
1410 | if (DirectCallee) | ||||||
1411 | if (getAlign(*cast<Function>(DirectCallee), Idx, Align)) | ||||||
1412 | return Align; | ||||||
1413 | |||||||
1414 | // Call is indirect or alignment information is not available, fall back to | ||||||
1415 | // the ABI type alignment | ||||||
1416 | return DL.getABITypeAlignment(Ty); | ||||||
1417 | } | ||||||
1418 | |||||||
1419 | SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||||||
1420 | SmallVectorImpl<SDValue> &InVals) const { | ||||||
1421 | SelectionDAG &DAG = CLI.DAG; | ||||||
1422 | SDLoc dl = CLI.DL; | ||||||
1423 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | ||||||
1424 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | ||||||
1425 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | ||||||
1426 | SDValue Chain = CLI.Chain; | ||||||
1427 | SDValue Callee = CLI.Callee; | ||||||
1428 | bool &isTailCall = CLI.IsTailCall; | ||||||
1429 | ArgListTy &Args = CLI.getArgs(); | ||||||
1430 | Type *RetTy = CLI.RetTy; | ||||||
1431 | ImmutableCallSite CS = CLI.CS; | ||||||
1432 | const DataLayout &DL = DAG.getDataLayout(); | ||||||
1433 | |||||||
1434 | bool isABI = (STI.getSmVersion() >= 20); | ||||||
1435 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1435, __PRETTY_FUNCTION__)); | ||||||
1436 | if (!isABI) | ||||||
1437 | return Chain; | ||||||
1438 | |||||||
1439 | SDValue tempChain = Chain; | ||||||
1440 | Chain = DAG.getCALLSEQ_START(Chain, uniqueCallSite, 0, dl); | ||||||
1441 | SDValue InFlag = Chain.getValue(1); | ||||||
1442 | |||||||
1443 | unsigned paramCount = 0; | ||||||
1444 | // Args.size() and Outs.size() need not match. | ||||||
1445 | // Outs.size() will be larger | ||||||
1446 | // * if there is an aggregate argument with multiple fields (each field | ||||||
1447 | // showing up separately in Outs) | ||||||
1448 | // * if there is a vector argument with more than typical vector-length | ||||||
1449 | // elements (generally if more than 4) where each vector element is | ||||||
1450 | // individually present in Outs. | ||||||
1451 | // So a different index should be used for indexing into Outs/OutVals. | ||||||
1452 | // See similar issue in LowerFormalArguments. | ||||||
1453 | unsigned OIdx = 0; | ||||||
1454 | // Declare the .params or .reg need to pass values | ||||||
1455 | // to the function | ||||||
1456 | for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { | ||||||
1457 | EVT VT = Outs[OIdx].VT; | ||||||
1458 | Type *Ty = Args[i].Ty; | ||||||
1459 | |||||||
1460 | if (!Outs[OIdx].Flags.isByVal()) { | ||||||
1461 | SmallVector<EVT, 16> VTs; | ||||||
1462 | SmallVector<uint64_t, 16> Offsets; | ||||||
1463 | ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); | ||||||
1464 | unsigned ArgAlign = | ||||||
1465 | getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); | ||||||
1466 | unsigned AllocSize = DL.getTypeAllocSize(Ty); | ||||||
1467 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1468 | bool NeedAlign; // Does argument declaration specify alignment? | ||||||
1469 | if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { | ||||||
1470 | // declare .param .align <align> .b8 .param<n>[<size>]; | ||||||
1471 | SDValue DeclareParamOps[] = { | ||||||
1472 | Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), | ||||||
1473 | DAG.getConstant(paramCount, dl, MVT::i32), | ||||||
1474 | DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; | ||||||
1475 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | ||||||
1476 | DeclareParamOps); | ||||||
1477 | NeedAlign = true; | ||||||
1478 | } else { | ||||||
1479 | // declare .param .b<size> .param<n>; | ||||||
1480 | if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) { | ||||||
1481 | // PTX ABI requires integral types to be at least 32 bits in | ||||||
1482 | // size. FP16 is loaded/stored using i16, so it's handled | ||||||
1483 | // here as well. | ||||||
1484 | AllocSize = 4; | ||||||
1485 | } | ||||||
1486 | SDValue DeclareScalarParamOps[] = { | ||||||
1487 | Chain, DAG.getConstant(paramCount, dl, MVT::i32), | ||||||
1488 | DAG.getConstant(AllocSize * 8, dl, MVT::i32), | ||||||
1489 | DAG.getConstant(0, dl, MVT::i32), InFlag}; | ||||||
1490 | Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, | ||||||
1491 | DeclareScalarParamOps); | ||||||
1492 | NeedAlign = false; | ||||||
1493 | } | ||||||
1494 | InFlag = Chain.getValue(1); | ||||||
1495 | |||||||
1496 | // PTX Interoperability Guide 3.3(A): [Integer] Values shorter | ||||||
1497 | // than 32-bits are sign extended or zero extended, depending on | ||||||
1498 | // whether they are signed or unsigned types. This case applies | ||||||
1499 | // only to scalar parameters and not to aggregate values. | ||||||
1500 | bool ExtendIntegerParam = | ||||||
1501 | Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; | ||||||
1502 | |||||||
1503 | auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); | ||||||
1504 | SmallVector<SDValue, 6> StoreOperands; | ||||||
1505 | for (unsigned j = 0, je = VTs.size(); j != je; ++j) { | ||||||
1506 | // New store. | ||||||
1507 | if (VectorInfo[j] & PVF_FIRST) { | ||||||
1508 | assert(StoreOperands.empty() && "Unfinished preceding store.")((StoreOperands.empty() && "Unfinished preceding store." ) ? static_cast<void> (0) : __assert_fail ("StoreOperands.empty() && \"Unfinished preceding store.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1508, __PRETTY_FUNCTION__)); | ||||||
1509 | StoreOperands.push_back(Chain); | ||||||
1510 | StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); | ||||||
1511 | StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32)); | ||||||
1512 | } | ||||||
1513 | |||||||
1514 | EVT EltVT = VTs[j]; | ||||||
1515 | SDValue StVal = OutVals[OIdx]; | ||||||
1516 | if (ExtendIntegerParam) { | ||||||
1517 | assert(VTs.size() == 1 && "Scalar can't have multiple parts.")((VTs.size() == 1 && "Scalar can't have multiple parts." ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == 1 && \"Scalar can't have multiple parts.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1517, __PRETTY_FUNCTION__)); | ||||||
1518 | // zext/sext to i32 | ||||||
1519 | StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND | ||||||
1520 | : ISD::ZERO_EXTEND, | ||||||
1521 | dl, MVT::i32, StVal); | ||||||
1522 | } else if (EltVT.getSizeInBits() < 16) { | ||||||
1523 | // Use 16-bit registers for small stores as it's the | ||||||
1524 | // smallest general purpose register size supported by NVPTX. | ||||||
1525 | StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); | ||||||
1526 | } | ||||||
1527 | |||||||
1528 | // Record the value to store. | ||||||
1529 | StoreOperands.push_back(StVal); | ||||||
1530 | |||||||
1531 | if (VectorInfo[j] & PVF_LAST) { | ||||||
1532 | unsigned NumElts = StoreOperands.size() - 3; | ||||||
1533 | NVPTXISD::NodeType Op; | ||||||
1534 | switch (NumElts) { | ||||||
1535 | case 1: | ||||||
1536 | Op = NVPTXISD::StoreParam; | ||||||
1537 | break; | ||||||
1538 | case 2: | ||||||
1539 | Op = NVPTXISD::StoreParamV2; | ||||||
1540 | break; | ||||||
1541 | case 4: | ||||||
1542 | Op = NVPTXISD::StoreParamV4; | ||||||
1543 | break; | ||||||
1544 | default: | ||||||
1545 | llvm_unreachable("Invalid vector info.")::llvm::llvm_unreachable_internal("Invalid vector info.", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1545); | ||||||
1546 | } | ||||||
1547 | |||||||
1548 | StoreOperands.push_back(InFlag); | ||||||
1549 | |||||||
1550 | // Adjust type of the store op if we've extended the scalar | ||||||
1551 | // return value. | ||||||
1552 | EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; | ||||||
1553 | unsigned EltAlign = | ||||||
1554 | NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; | ||||||
1555 | |||||||
1556 | Chain = DAG.getMemIntrinsicNode( | ||||||
1557 | Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, | ||||||
1558 | TheStoreType, MachinePointerInfo(), EltAlign, | ||||||
1559 | MachineMemOperand::MOStore); | ||||||
1560 | InFlag = Chain.getValue(1); | ||||||
1561 | |||||||
1562 | // Cleanup. | ||||||
1563 | StoreOperands.clear(); | ||||||
1564 | } | ||||||
1565 | ++OIdx; | ||||||
1566 | } | ||||||
1567 | assert(StoreOperands.empty() && "Unfinished parameter store.")((StoreOperands.empty() && "Unfinished parameter store." ) ? static_cast<void> (0) : __assert_fail ("StoreOperands.empty() && \"Unfinished parameter store.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1567, __PRETTY_FUNCTION__)); | ||||||
1568 | if (VTs.size() > 0) | ||||||
1569 | --OIdx; | ||||||
1570 | ++paramCount; | ||||||
1571 | continue; | ||||||
1572 | } | ||||||
1573 | |||||||
1574 | // ByVal arguments | ||||||
1575 | SmallVector<EVT, 16> VTs; | ||||||
1576 | SmallVector<uint64_t, 16> Offsets; | ||||||
1577 | auto *PTy = dyn_cast<PointerType>(Args[i].Ty); | ||||||
1578 | assert(PTy && "Type of a byval parameter should be pointer")((PTy && "Type of a byval parameter should be pointer" ) ? static_cast<void> (0) : __assert_fail ("PTy && \"Type of a byval parameter should be pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1578, __PRETTY_FUNCTION__)); | ||||||
1579 | ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0); | ||||||
1580 | |||||||
1581 | // declare .param .align <align> .b8 .param<n>[<size>]; | ||||||
1582 | unsigned sz = Outs[OIdx].Flags.getByValSize(); | ||||||
1583 | SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1584 | Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); | ||||||
1585 | // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, | ||||||
1586 | // so we don't need to worry about natural alignment or not. | ||||||
1587 | // See TargetLowering::LowerCallTo(). | ||||||
1588 | |||||||
1589 | // Enforce minumum alignment of 4 to work around ptxas miscompile | ||||||
1590 | // for sm_50+. See corresponding alignment adjustment in | ||||||
1591 | // emitFunctionParamList() for details. | ||||||
1592 | if (ArgAlign < Align(4)) | ||||||
1593 | ArgAlign = Align(4); | ||||||
1594 | SDValue DeclareParamOps[] = { | ||||||
1595 | Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), | ||||||
1596 | DAG.getConstant(paramCount, dl, MVT::i32), | ||||||
1597 | DAG.getConstant(sz, dl, MVT::i32), InFlag}; | ||||||
1598 | Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, | ||||||
1599 | DeclareParamOps); | ||||||
1600 | InFlag = Chain.getValue(1); | ||||||
1601 | for (unsigned j = 0, je = VTs.size(); j != je; ++j) { | ||||||
1602 | EVT elemtype = VTs[j]; | ||||||
1603 | int curOffset = Offsets[j]; | ||||||
1604 | unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset); | ||||||
1605 | auto PtrVT = getPointerTy(DL); | ||||||
1606 | SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], | ||||||
1607 | DAG.getConstant(curOffset, dl, PtrVT)); | ||||||
1608 | SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, | ||||||
1609 | MachinePointerInfo(), PartAlign); | ||||||
1610 | if (elemtype.getSizeInBits() < 16) { | ||||||
1611 | theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); | ||||||
1612 | } | ||||||
1613 | SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1614 | SDValue CopyParamOps[] = { Chain, | ||||||
1615 | DAG.getConstant(paramCount, dl, MVT::i32), | ||||||
1616 | DAG.getConstant(curOffset, dl, MVT::i32), | ||||||
1617 | theVal, InFlag }; | ||||||
1618 | Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, | ||||||
1619 | CopyParamOps, elemtype, | ||||||
1620 | MachinePointerInfo(), /* Align */ 0, | ||||||
1621 | MachineMemOperand::MOStore); | ||||||
1622 | |||||||
1623 | InFlag = Chain.getValue(1); | ||||||
1624 | } | ||||||
1625 | ++paramCount; | ||||||
1626 | } | ||||||
1627 | |||||||
1628 | GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); | ||||||
1629 | unsigned retAlignment = 0; | ||||||
1630 | |||||||
1631 | // Handle Result | ||||||
1632 | if (Ins.size() > 0) { | ||||||
1633 | SmallVector<EVT, 16> resvtparts; | ||||||
1634 | ComputeValueVTs(*this, DL, RetTy, resvtparts); | ||||||
1635 | |||||||
1636 | // Declare | ||||||
1637 | // .param .align 16 .b8 retval0[<size-in-bytes>], or | ||||||
1638 | // .param .b<size-in-bits> retval0 | ||||||
1639 | unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy); | ||||||
1640 | // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for | ||||||
1641 | // these three types to match the logic in | ||||||
1642 | // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. | ||||||
1643 | // Plus, this behavior is consistent with nvcc's. | ||||||
1644 | if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() || | ||||||
1645 | (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) { | ||||||
1646 | // Scalar needs to be at least 32bit wide | ||||||
1647 | if (resultsz < 32) | ||||||
1648 | resultsz = 32; | ||||||
1649 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1650 | SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | ||||||
1651 | DAG.getConstant(resultsz, dl, MVT::i32), | ||||||
1652 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | ||||||
1653 | Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, | ||||||
1654 | DeclareRetOps); | ||||||
1655 | InFlag = Chain.getValue(1); | ||||||
1656 | } else { | ||||||
1657 | retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); | ||||||
1658 | SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1659 | SDValue DeclareRetOps[] = { Chain, | ||||||
1660 | DAG.getConstant(retAlignment, dl, MVT::i32), | ||||||
1661 | DAG.getConstant(resultsz / 8, dl, MVT::i32), | ||||||
1662 | DAG.getConstant(0, dl, MVT::i32), InFlag }; | ||||||
1663 | Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, | ||||||
1664 | DeclareRetOps); | ||||||
1665 | InFlag = Chain.getValue(1); | ||||||
1666 | } | ||||||
1667 | } | ||||||
1668 | |||||||
1669 | // Both indirect calls and libcalls have nullptr Func. In order to distinguish | ||||||
1670 | // between them we must rely on the call site value which is valid for | ||||||
1671 | // indirect calls but is always null for libcalls. | ||||||
1672 | bool isIndirectCall = !Func && CS; | ||||||
1673 | |||||||
1674 | if (isa<ExternalSymbolSDNode>(Callee)) { | ||||||
1675 | Function* CalleeFunc = nullptr; | ||||||
1676 | |||||||
1677 | // Try to find the callee in the current module. | ||||||
1678 | Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc); | ||||||
1679 | assert(CalleeFunc != nullptr && "Libcall callee must be set.")((CalleeFunc != nullptr && "Libcall callee must be set." ) ? static_cast<void> (0) : __assert_fail ("CalleeFunc != nullptr && \"Libcall callee must be set.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1679, __PRETTY_FUNCTION__)); | ||||||
1680 | |||||||
1681 | // Set the "libcall callee" attribute to indicate that the function | ||||||
1682 | // must always have a declaration. | ||||||
1683 | CalleeFunc->addFnAttr("nvptx-libcall-callee", "true"); | ||||||
1684 | } | ||||||
1685 | |||||||
1686 | if (isIndirectCall) { | ||||||
1687 | // This is indirect function call case : PTX requires a prototype of the | ||||||
1688 | // form | ||||||
1689 | // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); | ||||||
1690 | // to be emitted, and the label has to used as the last arg of call | ||||||
1691 | // instruction. | ||||||
1692 | // The prototype is embedded in a string and put as the operand for a | ||||||
1693 | // CallPrototype SDNode which will print out to the value of the string. | ||||||
1694 | SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1695 | std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS); | ||||||
1696 | const char *ProtoStr = | ||||||
1697 | nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); | ||||||
1698 | SDValue ProtoOps[] = { | ||||||
1699 | Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, | ||||||
1700 | }; | ||||||
1701 | Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); | ||||||
1702 | InFlag = Chain.getValue(1); | ||||||
1703 | } | ||||||
1704 | // Op to just print "call" | ||||||
1705 | SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1706 | SDValue PrintCallOps[] = { | ||||||
1707 | Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag | ||||||
1708 | }; | ||||||
1709 | // We model convergent calls as separate opcodes. | ||||||
1710 | unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni; | ||||||
1711 | if (CLI.IsConvergent) | ||||||
1712 | Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni | ||||||
1713 | : NVPTXISD::PrintConvergentCall; | ||||||
1714 | Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); | ||||||
1715 | InFlag = Chain.getValue(1); | ||||||
1716 | |||||||
1717 | // Ops to print out the function name | ||||||
1718 | SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1719 | SDValue CallVoidOps[] = { Chain, Callee, InFlag }; | ||||||
1720 | Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); | ||||||
1721 | InFlag = Chain.getValue(1); | ||||||
1722 | |||||||
1723 | // Ops to print out the param list | ||||||
1724 | SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1725 | SDValue CallArgBeginOps[] = { Chain, InFlag }; | ||||||
1726 | Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, | ||||||
1727 | CallArgBeginOps); | ||||||
1728 | InFlag = Chain.getValue(1); | ||||||
1729 | |||||||
1730 | for (unsigned i = 0, e = paramCount; i != e; ++i) { | ||||||
1731 | unsigned opcode; | ||||||
1732 | if (i == (e - 1)) | ||||||
1733 | opcode = NVPTXISD::LastCallArg; | ||||||
1734 | else | ||||||
1735 | opcode = NVPTXISD::CallArg; | ||||||
1736 | SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1737 | SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), | ||||||
1738 | DAG.getConstant(i, dl, MVT::i32), InFlag }; | ||||||
1739 | Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); | ||||||
1740 | InFlag = Chain.getValue(1); | ||||||
1741 | } | ||||||
1742 | SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1743 | SDValue CallArgEndOps[] = { Chain, | ||||||
1744 | DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32), | ||||||
1745 | InFlag }; | ||||||
1746 | Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); | ||||||
1747 | InFlag = Chain.getValue(1); | ||||||
1748 | |||||||
1749 | if (isIndirectCall) { | ||||||
1750 | SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||||
1751 | SDValue PrototypeOps[] = { Chain, | ||||||
1752 | DAG.getConstant(uniqueCallSite, dl, MVT::i32), | ||||||
1753 | InFlag }; | ||||||
1754 | Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); | ||||||
1755 | InFlag = Chain.getValue(1); | ||||||
1756 | } | ||||||
1757 | |||||||
1758 | SmallVector<SDValue, 16> ProxyRegOps; | ||||||
1759 | SmallVector<Optional<MVT>, 16> ProxyRegTruncates; | ||||||
1760 | |||||||
1761 | // Generate loads from param memory/moves from registers for result | ||||||
1762 | if (Ins.size() > 0) { | ||||||
1763 | SmallVector<EVT, 16> VTs; | ||||||
1764 | SmallVector<uint64_t, 16> Offsets; | ||||||
1765 | ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); | ||||||
1766 | assert(VTs.size() == Ins.size() && "Bad value decomposition")((VTs.size() == Ins.size() && "Bad value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == Ins.size() && \"Bad value decomposition\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1766, __PRETTY_FUNCTION__)); | ||||||
1767 | |||||||
1768 | unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); | ||||||
1769 | auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); | ||||||
1770 | |||||||
1771 | SmallVector<EVT, 6> LoadVTs; | ||||||
1772 | int VecIdx = -1; // Index of the first element of the vector. | ||||||
1773 | |||||||
1774 | // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than | ||||||
1775 | // 32-bits are sign extended or zero extended, depending on whether | ||||||
1776 | // they are signed or unsigned types. | ||||||
1777 | bool ExtendIntegerRetVal = | ||||||
1778 | RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; | ||||||
1779 | |||||||
1780 | for (unsigned i = 0, e = VTs.size(); i != e; ++i) { | ||||||
1781 | bool needTruncate = false; | ||||||
1782 | EVT TheLoadType = VTs[i]; | ||||||
1783 | EVT EltType = Ins[i].VT; | ||||||
1784 | unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); | ||||||
1785 | if (ExtendIntegerRetVal) { | ||||||
1786 | TheLoadType = MVT::i32; | ||||||
1787 | EltType = MVT::i32; | ||||||
1788 | needTruncate = true; | ||||||
1789 | } else if (TheLoadType.getSizeInBits() < 16) { | ||||||
1790 | if (VTs[i].isInteger()) | ||||||
1791 | needTruncate = true; | ||||||
1792 | EltType = MVT::i16; | ||||||
1793 | } | ||||||
1794 | |||||||
1795 | // Record index of the very first element of the vector. | ||||||
1796 | if (VectorInfo[i] & PVF_FIRST) { | ||||||
1797 | assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.")((VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list." ) ? static_cast<void> (0) : __assert_fail ("VecIdx == -1 && LoadVTs.empty() && \"Orphaned operand list.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1797, __PRETTY_FUNCTION__)); | ||||||
1798 | VecIdx = i; | ||||||
1799 | } | ||||||
1800 | |||||||
1801 | LoadVTs.push_back(EltType); | ||||||
1802 | |||||||
1803 | if (VectorInfo[i] & PVF_LAST) { | ||||||
1804 | unsigned NumElts = LoadVTs.size(); | ||||||
1805 | LoadVTs.push_back(MVT::Other); | ||||||
1806 | LoadVTs.push_back(MVT::Glue); | ||||||
1807 | NVPTXISD::NodeType Op; | ||||||
1808 | switch (NumElts) { | ||||||
1809 | case 1: | ||||||
1810 | Op = NVPTXISD::LoadParam; | ||||||
1811 | break; | ||||||
1812 | case 2: | ||||||
1813 | Op = NVPTXISD::LoadParamV2; | ||||||
1814 | break; | ||||||
1815 | case 4: | ||||||
1816 | Op = NVPTXISD::LoadParamV4; | ||||||
1817 | break; | ||||||
1818 | default: | ||||||
1819 | llvm_unreachable("Invalid vector info.")::llvm::llvm_unreachable_internal("Invalid vector info.", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1819); | ||||||
1820 | } | ||||||
1821 | |||||||
1822 | SDValue LoadOperands[] = { | ||||||
1823 | Chain, DAG.getConstant(1, dl, MVT::i32), | ||||||
1824 | DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; | ||||||
1825 | SDValue RetVal = DAG.getMemIntrinsicNode( | ||||||
1826 | Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, | ||||||
1827 | MachinePointerInfo(), EltAlign, | ||||||
1828 | MachineMemOperand::MOLoad); | ||||||
1829 | |||||||
1830 | for (unsigned j = 0; j < NumElts; ++j) { | ||||||
1831 | ProxyRegOps.push_back(RetVal.getValue(j)); | ||||||
1832 | |||||||
1833 | if (needTruncate) | ||||||
1834 | ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT)); | ||||||
1835 | else | ||||||
1836 | ProxyRegTruncates.push_back(Optional<MVT>()); | ||||||
1837 | } | ||||||
1838 | |||||||
1839 | Chain = RetVal.getValue(NumElts); | ||||||
1840 | InFlag = RetVal.getValue(NumElts + 1); | ||||||
1841 | |||||||
1842 | // Cleanup | ||||||
1843 | VecIdx = -1; | ||||||
1844 | LoadVTs.clear(); | ||||||
1845 | } | ||||||
1846 | } | ||||||
1847 | } | ||||||
1848 | |||||||
1849 | Chain = DAG.getCALLSEQ_END(Chain, | ||||||
1850 | DAG.getIntPtrConstant(uniqueCallSite, dl, true), | ||||||
1851 | DAG.getIntPtrConstant(uniqueCallSite + 1, dl, | ||||||
1852 | true), | ||||||
1853 | InFlag, dl); | ||||||
1854 | InFlag = Chain.getValue(1); | ||||||
1855 | uniqueCallSite++; | ||||||
1856 | |||||||
1857 | // Append ProxyReg instructions to the chain to make sure that `callseq_end` | ||||||
1858 | // will not get lost. Otherwise, during libcalls expansion, the nodes can become | ||||||
1859 | // dangling. | ||||||
1860 | for (unsigned i = 0; i < ProxyRegOps.size(); ++i) { | ||||||
1861 | SDValue Ret = DAG.getNode( | ||||||
1862 | NVPTXISD::ProxyReg, dl, | ||||||
1863 | DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue), | ||||||
1864 | { Chain, ProxyRegOps[i], InFlag } | ||||||
1865 | ); | ||||||
1866 | |||||||
1867 | Chain = Ret.getValue(1); | ||||||
1868 | InFlag = Ret.getValue(2); | ||||||
1869 | |||||||
1870 | if (ProxyRegTruncates[i].hasValue()) { | ||||||
1871 | Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret); | ||||||
1872 | } | ||||||
1873 | |||||||
1874 | InVals.push_back(Ret); | ||||||
1875 | } | ||||||
1876 | |||||||
1877 | // set isTailCall to false for now, until we figure out how to express | ||||||
1878 | // tail call optimization in PTX | ||||||
1879 | isTailCall = false; | ||||||
1880 | return Chain; | ||||||
1881 | } | ||||||
1882 | |||||||
1883 | // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() | ||||||
1884 | // (see LegalizeDAG.cpp). This is slow and uses local memory. | ||||||
1885 | // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 | ||||||
1886 | SDValue | ||||||
1887 | NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { | ||||||
1888 | SDNode *Node = Op.getNode(); | ||||||
1889 | SDLoc dl(Node); | ||||||
1890 | SmallVector<SDValue, 8> Ops; | ||||||
1891 | unsigned NumOperands = Node->getNumOperands(); | ||||||
1892 | for (unsigned i = 0; i < NumOperands; ++i) { | ||||||
1893 | SDValue SubOp = Node->getOperand(i); | ||||||
1894 | EVT VVT = SubOp.getNode()->getValueType(0); | ||||||
1895 | EVT EltVT = VVT.getVectorElementType(); | ||||||
1896 | unsigned NumSubElem = VVT.getVectorNumElements(); | ||||||
1897 | for (unsigned j = 0; j < NumSubElem; ++j) { | ||||||
1898 | Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, | ||||||
1899 | DAG.getIntPtrConstant(j, dl))); | ||||||
1900 | } | ||||||
1901 | } | ||||||
1902 | return DAG.getBuildVector(Node->getValueType(0), dl, Ops); | ||||||
1903 | } | ||||||
1904 | |||||||
1905 | // We can init constant f16x2 with a single .b32 move. Normally it | ||||||
1906 | // would get lowered as two constant loads and vector-packing move. | ||||||
1907 | // mov.b16 %h1, 0x4000; | ||||||
1908 | // mov.b16 %h2, 0x3C00; | ||||||
1909 | // mov.b32 %hh2, {%h2, %h1}; | ||||||
1910 | // Instead we want just a constant move: | ||||||
1911 | // mov.b32 %hh2, 0x40003C00 | ||||||
1912 | // | ||||||
1913 | // This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0 | ||||||
1914 | // generates good SASS in both cases. | ||||||
1915 | SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, | ||||||
1916 | SelectionDAG &DAG) const { | ||||||
1917 | //return Op; | ||||||
1918 | if (!(Op->getValueType(0) == MVT::v2f16 && | ||||||
1919 | isa<ConstantFPSDNode>(Op->getOperand(0)) && | ||||||
1920 | isa<ConstantFPSDNode>(Op->getOperand(1)))) | ||||||
1921 | return Op; | ||||||
1922 | |||||||
1923 | APInt E0 = | ||||||
1924 | cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt(); | ||||||
1925 | APInt E1 = | ||||||
1926 | cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt(); | ||||||
1927 | SDValue Const = | ||||||
1928 | DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); | ||||||
1929 | return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const); | ||||||
1930 | } | ||||||
1931 | |||||||
1932 | SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, | ||||||
1933 | SelectionDAG &DAG) const { | ||||||
1934 | SDValue Index = Op->getOperand(1); | ||||||
1935 | // Constant index will be matched by tablegen. | ||||||
1936 | if (isa<ConstantSDNode>(Index.getNode())) | ||||||
1937 | return Op; | ||||||
1938 | |||||||
1939 | // Extract individual elements and select one of them. | ||||||
1940 | SDValue Vector = Op->getOperand(0); | ||||||
1941 | EVT VectorVT = Vector.getValueType(); | ||||||
1942 | assert(VectorVT == MVT::v2f16 && "Unexpected vector type.")((VectorVT == MVT::v2f16 && "Unexpected vector type." ) ? static_cast<void> (0) : __assert_fail ("VectorVT == MVT::v2f16 && \"Unexpected vector type.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1942, __PRETTY_FUNCTION__)); | ||||||
1943 | EVT EltVT = VectorVT.getVectorElementType(); | ||||||
1944 | |||||||
1945 | SDLoc dl(Op.getNode()); | ||||||
1946 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, | ||||||
1947 | DAG.getIntPtrConstant(0, dl)); | ||||||
1948 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, | ||||||
1949 | DAG.getIntPtrConstant(1, dl)); | ||||||
1950 | return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1, | ||||||
1951 | ISD::CondCode::SETEQ); | ||||||
1952 | } | ||||||
1953 | |||||||
1954 | /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which | ||||||
1955 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | ||||||
1956 | /// amount, or | ||||||
1957 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | ||||||
1958 | /// amount. | ||||||
1959 | SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, | ||||||
1960 | SelectionDAG &DAG) const { | ||||||
1961 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1961, __PRETTY_FUNCTION__)); | ||||||
1962 | assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD:: SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 1962, __PRETTY_FUNCTION__)); | ||||||
1963 | |||||||
1964 | EVT VT = Op.getValueType(); | ||||||
1965 | unsigned VTBits = VT.getSizeInBits(); | ||||||
1966 | SDLoc dl(Op); | ||||||
1967 | SDValue ShOpLo = Op.getOperand(0); | ||||||
1968 | SDValue ShOpHi = Op.getOperand(1); | ||||||
1969 | SDValue ShAmt = Op.getOperand(2); | ||||||
1970 | unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; | ||||||
1971 | |||||||
1972 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | ||||||
1973 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | ||||||
1974 | // {dHi, dLo} = {aHi, aLo} >> Amt | ||||||
1975 | // dHi = aHi >> Amt | ||||||
1976 | // dLo = shf.r.clamp aLo, aHi, Amt | ||||||
1977 | |||||||
1978 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | ||||||
1979 | SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, | ||||||
1980 | ShAmt); | ||||||
1981 | |||||||
1982 | SDValue Ops[2] = { Lo, Hi }; | ||||||
1983 | return DAG.getMergeValues(Ops, dl); | ||||||
1984 | } | ||||||
1985 | else { | ||||||
1986 | // {dHi, dLo} = {aHi, aLo} >> Amt | ||||||
1987 | // - if (Amt>=size) then | ||||||
1988 | // dLo = aHi >> (Amt-size) | ||||||
1989 | // dHi = aHi >> Amt (this is either all 0 or all 1) | ||||||
1990 | // else | ||||||
1991 | // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) | ||||||
1992 | // dHi = aHi >> Amt | ||||||
1993 | |||||||
1994 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | ||||||
1995 | DAG.getConstant(VTBits, dl, MVT::i32), | ||||||
1996 | ShAmt); | ||||||
1997 | SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); | ||||||
1998 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | ||||||
1999 | DAG.getConstant(VTBits, dl, MVT::i32)); | ||||||
2000 | SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); | ||||||
2001 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | ||||||
2002 | SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); | ||||||
2003 | |||||||
2004 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | ||||||
2005 | DAG.getConstant(VTBits, dl, MVT::i32), | ||||||
2006 | ISD::SETGE); | ||||||
2007 | SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); | ||||||
2008 | SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | ||||||
2009 | |||||||
2010 | SDValue Ops[2] = { Lo, Hi }; | ||||||
2011 | return DAG.getMergeValues(Ops, dl); | ||||||
2012 | } | ||||||
2013 | } | ||||||
2014 | |||||||
2015 | /// LowerShiftLeftParts - Lower SHL_PARTS, which | ||||||
2016 | /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift | ||||||
2017 | /// amount, or | ||||||
2018 | /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift | ||||||
2019 | /// amount. | ||||||
2020 | SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, | ||||||
2021 | SelectionDAG &DAG) const { | ||||||
2022 | assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2022, __PRETTY_FUNCTION__)); | ||||||
2023 | assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2023, __PRETTY_FUNCTION__)); | ||||||
2024 | |||||||
2025 | EVT VT = Op.getValueType(); | ||||||
2026 | unsigned VTBits = VT.getSizeInBits(); | ||||||
2027 | SDLoc dl(Op); | ||||||
2028 | SDValue ShOpLo = Op.getOperand(0); | ||||||
2029 | SDValue ShOpHi = Op.getOperand(1); | ||||||
2030 | SDValue ShAmt = Op.getOperand(2); | ||||||
2031 | |||||||
2032 | if (VTBits == 32 && STI.getSmVersion() >= 35) { | ||||||
2033 | // For 32bit and sm35, we can use the funnel shift 'shf' instruction. | ||||||
2034 | // {dHi, dLo} = {aHi, aLo} << Amt | ||||||
2035 | // dHi = shf.l.clamp aLo, aHi, Amt | ||||||
2036 | // dLo = aLo << Amt | ||||||
2037 | |||||||
2038 | SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, | ||||||
2039 | ShAmt); | ||||||
2040 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | ||||||
2041 | |||||||
2042 | SDValue Ops[2] = { Lo, Hi }; | ||||||
2043 | return DAG.getMergeValues(Ops, dl); | ||||||
2044 | } | ||||||
2045 | else { | ||||||
2046 | // {dHi, dLo} = {aHi, aLo} << Amt | ||||||
2047 | // - if (Amt>=size) then | ||||||
2048 | // dLo = aLo << Amt (all 0) | ||||||
2049 | // dLo = aLo << (Amt-size) | ||||||
2050 | // else | ||||||
2051 | // dLo = aLo << Amt | ||||||
2052 | // dHi = (aHi << Amt) | (aLo >> (size-Amt)) | ||||||
2053 | |||||||
2054 | SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, | ||||||
2055 | DAG.getConstant(VTBits, dl, MVT::i32), | ||||||
2056 | ShAmt); | ||||||
2057 | SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); | ||||||
2058 | SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, | ||||||
2059 | DAG.getConstant(VTBits, dl, MVT::i32)); | ||||||
2060 | SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); | ||||||
2061 | SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); | ||||||
2062 | SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); | ||||||
2063 | |||||||
2064 | SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, | ||||||
2065 | DAG.getConstant(VTBits, dl, MVT::i32), | ||||||
2066 | ISD::SETGE); | ||||||
2067 | SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); | ||||||
2068 | SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); | ||||||
2069 | |||||||
2070 | SDValue Ops[2] = { Lo, Hi }; | ||||||
2071 | return DAG.getMergeValues(Ops, dl); | ||||||
2072 | } | ||||||
2073 | } | ||||||
2074 | |||||||
2075 | SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { | ||||||
2076 | EVT VT = Op.getValueType(); | ||||||
2077 | |||||||
2078 | if (VT == MVT::f32) | ||||||
2079 | return LowerFROUND32(Op, DAG); | ||||||
2080 | |||||||
2081 | if (VT == MVT::f64) | ||||||
2082 | return LowerFROUND64(Op, DAG); | ||||||
2083 | |||||||
2084 | llvm_unreachable("unhandled type")::llvm::llvm_unreachable_internal("unhandled type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2084); | ||||||
2085 | } | ||||||
2086 | |||||||
2087 | // This is the the rounding method used in CUDA libdevice in C like code: | ||||||
2088 | // float roundf(float A) | ||||||
2089 | // { | ||||||
2090 | // float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f)); | ||||||
2091 | // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA; | ||||||
2092 | // return abs(A) < 0.5 ? (float)(int)A : RoundedA; | ||||||
2093 | // } | ||||||
2094 | SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op, | ||||||
2095 | SelectionDAG &DAG) const { | ||||||
2096 | SDLoc SL(Op); | ||||||
2097 | SDValue A = Op.getOperand(0); | ||||||
2098 | EVT VT = Op.getValueType(); | ||||||
2099 | |||||||
2100 | SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); | ||||||
2101 | |||||||
2102 | // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f)) | ||||||
2103 | SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A); | ||||||
2104 | const int SignBitMask = 0x80000000; | ||||||
2105 | SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast, | ||||||
2106 | DAG.getConstant(SignBitMask, SL, MVT::i32)); | ||||||
2107 | const int PointFiveInBits = 0x3F000000; | ||||||
2108 | SDValue PointFiveWithSignRaw = | ||||||
2109 | DAG.getNode(ISD::OR, SL, MVT::i32, Sign, | ||||||
2110 | DAG.getConstant(PointFiveInBits, SL, MVT::i32)); | ||||||
2111 | SDValue PointFiveWithSign = | ||||||
2112 | DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw); | ||||||
2113 | SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign); | ||||||
2114 | SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); | ||||||
2115 | |||||||
2116 | // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA; | ||||||
2117 | EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); | ||||||
2118 | SDValue IsLarge = | ||||||
2119 | DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT), | ||||||
2120 | ISD::SETOGT); | ||||||
2121 | RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); | ||||||
2122 | |||||||
2123 | // return abs(A) < 0.5 ? (float)(int)A : RoundedA; | ||||||
2124 | SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, | ||||||
2125 | DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); | ||||||
2126 | SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A); | ||||||
2127 | return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA); | ||||||
2128 | } | ||||||
2129 | |||||||
2130 | // The implementation of round(double) is similar to that of round(float) in | ||||||
2131 | // that they both separate the value range into three regions and use a method | ||||||
2132 | // specific to the region to round the values. However, round(double) first | ||||||
2133 | // calculates the round of the absolute value and then adds the sign back while | ||||||
2134 | // round(float) directly rounds the value with sign. | ||||||
2135 | SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op, | ||||||
2136 | SelectionDAG &DAG) const { | ||||||
2137 | SDLoc SL(Op); | ||||||
2138 | SDValue A = Op.getOperand(0); | ||||||
2139 | EVT VT = Op.getValueType(); | ||||||
2140 | |||||||
2141 | SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A); | ||||||
2142 | |||||||
2143 | // double RoundedA = (double) (int) (abs(A) + 0.5f); | ||||||
2144 | SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA, | ||||||
2145 | DAG.getConstantFP(0.5, SL, VT)); | ||||||
2146 | SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA); | ||||||
2147 | |||||||
2148 | // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA; | ||||||
2149 | EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); | ||||||
2150 | SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA, | ||||||
2151 | DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT); | ||||||
2152 | RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall, | ||||||
2153 | DAG.getConstantFP(0, SL, VT), | ||||||
2154 | RoundedA); | ||||||
2155 | |||||||
2156 | // Add sign to rounded_A | ||||||
2157 | RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A); | ||||||
2158 | DAG.getNode(ISD::FTRUNC, SL, VT, A); | ||||||
2159 | |||||||
2160 | // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA; | ||||||
2161 | SDValue IsLarge = | ||||||
2162 | DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT), | ||||||
2163 | ISD::SETOGT); | ||||||
2164 | return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA); | ||||||
2165 | } | ||||||
2166 | |||||||
2167 | |||||||
2168 | |||||||
2169 | SDValue | ||||||
2170 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | ||||||
2171 | switch (Op.getOpcode()) { | ||||||
2172 | case ISD::RETURNADDR: | ||||||
2173 | return SDValue(); | ||||||
2174 | case ISD::FRAMEADDR: | ||||||
2175 | return SDValue(); | ||||||
2176 | case ISD::GlobalAddress: | ||||||
2177 | return LowerGlobalAddress(Op, DAG); | ||||||
2178 | case ISD::INTRINSIC_W_CHAIN: | ||||||
2179 | return Op; | ||||||
2180 | case ISD::BUILD_VECTOR: | ||||||
2181 | return LowerBUILD_VECTOR(Op, DAG); | ||||||
2182 | case ISD::EXTRACT_SUBVECTOR: | ||||||
2183 | return Op; | ||||||
2184 | case ISD::EXTRACT_VECTOR_ELT: | ||||||
2185 | return LowerEXTRACT_VECTOR_ELT(Op, DAG); | ||||||
2186 | case ISD::CONCAT_VECTORS: | ||||||
2187 | return LowerCONCAT_VECTORS(Op, DAG); | ||||||
2188 | case ISD::STORE: | ||||||
2189 | return LowerSTORE(Op, DAG); | ||||||
2190 | case ISD::LOAD: | ||||||
2191 | return LowerLOAD(Op, DAG); | ||||||
2192 | case ISD::SHL_PARTS: | ||||||
2193 | return LowerShiftLeftParts(Op, DAG); | ||||||
2194 | case ISD::SRA_PARTS: | ||||||
2195 | case ISD::SRL_PARTS: | ||||||
2196 | return LowerShiftRightParts(Op, DAG); | ||||||
2197 | case ISD::SELECT: | ||||||
2198 | return LowerSelect(Op, DAG); | ||||||
2199 | case ISD::FROUND: | ||||||
2200 | return LowerFROUND(Op, DAG); | ||||||
2201 | default: | ||||||
2202 | llvm_unreachable("Custom lowering not defined for operation")::llvm::llvm_unreachable_internal("Custom lowering not defined for operation" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2202); | ||||||
2203 | } | ||||||
2204 | } | ||||||
2205 | |||||||
2206 | SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { | ||||||
2207 | SDValue Op0 = Op->getOperand(0); | ||||||
2208 | SDValue Op1 = Op->getOperand(1); | ||||||
2209 | SDValue Op2 = Op->getOperand(2); | ||||||
2210 | SDLoc DL(Op.getNode()); | ||||||
2211 | |||||||
2212 | assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1")((Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering enabled only for i1\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2212, __PRETTY_FUNCTION__)); | ||||||
2213 | |||||||
2214 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); | ||||||
2215 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); | ||||||
2216 | SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); | ||||||
2217 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); | ||||||
2218 | |||||||
2219 | return Trunc; | ||||||
2220 | } | ||||||
2221 | |||||||
2222 | SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | ||||||
2223 | if (Op.getValueType() == MVT::i1) | ||||||
2224 | return LowerLOADi1(Op, DAG); | ||||||
2225 | |||||||
2226 | // v2f16 is legal, so we can't rely on legalizer to handle unaligned | ||||||
2227 | // loads and have to handle it here. | ||||||
2228 | if (Op.getValueType() == MVT::v2f16) { | ||||||
2229 | LoadSDNode *Load = cast<LoadSDNode>(Op); | ||||||
2230 | EVT MemVT = Load->getMemoryVT(); | ||||||
2231 | if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), | ||||||
2232 | MemVT, *Load->getMemOperand())) { | ||||||
2233 | SDValue Ops[2]; | ||||||
2234 | std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); | ||||||
2235 | return DAG.getMergeValues(Ops, SDLoc(Op)); | ||||||
2236 | } | ||||||
2237 | } | ||||||
2238 | |||||||
2239 | return SDValue(); | ||||||
2240 | } | ||||||
2241 | |||||||
2242 | // v = ld i1* addr | ||||||
2243 | // => | ||||||
2244 | // v1 = ld i8* addr (-> i16) | ||||||
2245 | // v = trunc i16 to i1 | ||||||
2246 | SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { | ||||||
2247 | SDNode *Node = Op.getNode(); | ||||||
2248 | LoadSDNode *LD = cast<LoadSDNode>(Node); | ||||||
2249 | SDLoc dl(Node); | ||||||
2250 | assert(LD->getExtensionType() == ISD::NON_EXTLOAD)((LD->getExtensionType() == ISD::NON_EXTLOAD) ? static_cast <void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2250, __PRETTY_FUNCTION__)); | ||||||
2251 | assert(Node->getValueType(0) == MVT::i1 &&((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2252, __PRETTY_FUNCTION__)) | ||||||
2252 | "Custom lowering for i1 load only")((Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only" ) ? static_cast<void> (0) : __assert_fail ("Node->getValueType(0) == MVT::i1 && \"Custom lowering for i1 load only\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2252, __PRETTY_FUNCTION__)); | ||||||
2253 | SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), | ||||||
2254 | LD->getPointerInfo(), LD->getAlignment(), | ||||||
2255 | LD->getMemOperand()->getFlags()); | ||||||
2256 | SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); | ||||||
2257 | // The legalizer (the caller) is expecting two values from the legalized | ||||||
2258 | // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() | ||||||
2259 | // in LegalizeDAG.cpp which also uses MergeValues. | ||||||
2260 | SDValue Ops[] = { result, LD->getChain() }; | ||||||
2261 | return DAG.getMergeValues(Ops, dl); | ||||||
2262 | } | ||||||
2263 | |||||||
2264 | SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | ||||||
2265 | StoreSDNode *Store = cast<StoreSDNode>(Op); | ||||||
2266 | EVT VT = Store->getMemoryVT(); | ||||||
2267 | |||||||
2268 | if (VT == MVT::i1) | ||||||
2269 | return LowerSTOREi1(Op, DAG); | ||||||
2270 | |||||||
2271 | // v2f16 is legal, so we can't rely on legalizer to handle unaligned | ||||||
2272 | // stores and have to handle it here. | ||||||
2273 | if (VT == MVT::v2f16 && | ||||||
2274 | !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), | ||||||
2275 | VT, *Store->getMemOperand())) | ||||||
2276 | return expandUnalignedStore(Store, DAG); | ||||||
2277 | |||||||
2278 | if (VT.isVector()) | ||||||
2279 | return LowerSTOREVector(Op, DAG); | ||||||
2280 | |||||||
2281 | return SDValue(); | ||||||
2282 | } | ||||||
2283 | |||||||
2284 | SDValue | ||||||
2285 | NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { | ||||||
2286 | SDNode *N = Op.getNode(); | ||||||
2287 | SDValue Val = N->getOperand(1); | ||||||
2288 | SDLoc DL(N); | ||||||
2289 | EVT ValVT = Val.getValueType(); | ||||||
2290 | |||||||
2291 | if (ValVT.isVector()) { | ||||||
2292 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | ||||||
2293 | // legal. We can (and should) split that into 2 stores of <2 x double> here | ||||||
2294 | // but I'm leaving that as a TODO for now. | ||||||
2295 | if (!ValVT.isSimple()) | ||||||
2296 | return SDValue(); | ||||||
2297 | switch (ValVT.getSimpleVT().SimpleTy) { | ||||||
2298 | default: | ||||||
2299 | return SDValue(); | ||||||
2300 | case MVT::v2i8: | ||||||
2301 | case MVT::v2i16: | ||||||
2302 | case MVT::v2i32: | ||||||
2303 | case MVT::v2i64: | ||||||
2304 | case MVT::v2f16: | ||||||
2305 | case MVT::v2f32: | ||||||
2306 | case MVT::v2f64: | ||||||
2307 | case MVT::v4i8: | ||||||
2308 | case MVT::v4i16: | ||||||
2309 | case MVT::v4i32: | ||||||
2310 | case MVT::v4f16: | ||||||
2311 | case MVT::v4f32: | ||||||
2312 | case MVT::v8f16: // <4 x f16x2> | ||||||
2313 | // This is a "native" vector type | ||||||
2314 | break; | ||||||
2315 | } | ||||||
2316 | |||||||
2317 | MemSDNode *MemSD = cast<MemSDNode>(N); | ||||||
2318 | const DataLayout &TD = DAG.getDataLayout(); | ||||||
2319 | |||||||
2320 | unsigned Align = MemSD->getAlignment(); | ||||||
2321 | unsigned PrefAlign = | ||||||
2322 | TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); | ||||||
2323 | if (Align < PrefAlign) { | ||||||
2324 | // This store is not sufficiently aligned, so bail out and let this vector | ||||||
2325 | // store be scalarized. Note that we may still be able to emit smaller | ||||||
2326 | // vector stores. For example, if we are storing a <4 x float> with an | ||||||
2327 | // alignment of 8, this check will fail but the legalizer will try again | ||||||
2328 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | ||||||
2329 | return SDValue(); | ||||||
2330 | } | ||||||
2331 | |||||||
2332 | unsigned Opcode = 0; | ||||||
2333 | EVT EltVT = ValVT.getVectorElementType(); | ||||||
2334 | unsigned NumElts = ValVT.getVectorNumElements(); | ||||||
2335 | |||||||
2336 | // Since StoreV2 is a target node, we cannot rely on DAG type legalization. | ||||||
2337 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | ||||||
2338 | // stored type to i16 and propagate the "real" type as the memory type. | ||||||
2339 | bool NeedExt = false; | ||||||
2340 | if (EltVT.getSizeInBits() < 16) | ||||||
2341 | NeedExt = true; | ||||||
2342 | |||||||
2343 | bool StoreF16x2 = false; | ||||||
2344 | switch (NumElts) { | ||||||
2345 | default: | ||||||
2346 | return SDValue(); | ||||||
2347 | case 2: | ||||||
2348 | Opcode = NVPTXISD::StoreV2; | ||||||
2349 | break; | ||||||
2350 | case 4: | ||||||
2351 | Opcode = NVPTXISD::StoreV4; | ||||||
2352 | break; | ||||||
2353 | case 8: | ||||||
2354 | // v8f16 is a special case. PTX doesn't have st.v8.f16 | ||||||
2355 | // instruction. Instead, we split the vector into v2f16 chunks and | ||||||
2356 | // store them with st.v4.b32. | ||||||
2357 | assert(EltVT == MVT::f16 && "Wrong type for the vector.")((EltVT == MVT::f16 && "Wrong type for the vector.") ? static_cast<void> (0) : __assert_fail ("EltVT == MVT::f16 && \"Wrong type for the vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2357, __PRETTY_FUNCTION__)); | ||||||
2358 | Opcode = NVPTXISD::StoreV4; | ||||||
2359 | StoreF16x2 = true; | ||||||
2360 | break; | ||||||
2361 | } | ||||||
2362 | |||||||
2363 | SmallVector<SDValue, 8> Ops; | ||||||
2364 | |||||||
2365 | // First is the chain | ||||||
2366 | Ops.push_back(N->getOperand(0)); | ||||||
2367 | |||||||
2368 | if (StoreF16x2) { | ||||||
2369 | // Combine f16,f16 -> v2f16 | ||||||
2370 | NumElts /= 2; | ||||||
2371 | for (unsigned i = 0; i < NumElts; ++i) { | ||||||
2372 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, | ||||||
2373 | DAG.getIntPtrConstant(i * 2, DL)); | ||||||
2374 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, | ||||||
2375 | DAG.getIntPtrConstant(i * 2 + 1, DL)); | ||||||
2376 | SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1); | ||||||
2377 | Ops.push_back(V2); | ||||||
2378 | } | ||||||
2379 | } else { | ||||||
2380 | // Then the split values | ||||||
2381 | for (unsigned i = 0; i < NumElts; ++i) { | ||||||
2382 | SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, | ||||||
2383 | DAG.getIntPtrConstant(i, DL)); | ||||||
2384 | if (NeedExt) | ||||||
2385 | ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); | ||||||
2386 | Ops.push_back(ExtVal); | ||||||
2387 | } | ||||||
2388 | } | ||||||
2389 | |||||||
2390 | // Then any remaining arguments | ||||||
2391 | Ops.append(N->op_begin() + 2, N->op_end()); | ||||||
2392 | |||||||
2393 | SDValue NewSt = | ||||||
2394 | DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops, | ||||||
2395 | MemSD->getMemoryVT(), MemSD->getMemOperand()); | ||||||
2396 | |||||||
2397 | // return DCI.CombineTo(N, NewSt, true); | ||||||
2398 | return NewSt; | ||||||
2399 | } | ||||||
2400 | |||||||
2401 | return SDValue(); | ||||||
2402 | } | ||||||
2403 | |||||||
2404 | // st i1 v, addr | ||||||
2405 | // => | ||||||
2406 | // v1 = zxt v to i16 | ||||||
2407 | // st.u8 i16, addr | ||||||
2408 | SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { | ||||||
2409 | SDNode *Node = Op.getNode(); | ||||||
2410 | SDLoc dl(Node); | ||||||
2411 | StoreSDNode *ST = cast<StoreSDNode>(Node); | ||||||
2412 | SDValue Tmp1 = ST->getChain(); | ||||||
2413 | SDValue Tmp2 = ST->getBasePtr(); | ||||||
2414 | SDValue Tmp3 = ST->getValue(); | ||||||
2415 | assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only")((Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only" ) ? static_cast<void> (0) : __assert_fail ("Tmp3.getValueType() == MVT::i1 && \"Custom lowering for i1 store only\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2415, __PRETTY_FUNCTION__)); | ||||||
2416 | Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); | ||||||
2417 | SDValue Result = | ||||||
2418 | DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, | ||||||
2419 | ST->getAlignment(), ST->getMemOperand()->getFlags()); | ||||||
2420 | return Result; | ||||||
2421 | } | ||||||
2422 | |||||||
2423 | SDValue | ||||||
2424 | NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { | ||||||
2425 | std::string ParamSym; | ||||||
2426 | raw_string_ostream ParamStr(ParamSym); | ||||||
2427 | |||||||
2428 | ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; | ||||||
2429 | ParamStr.flush(); | ||||||
2430 | |||||||
2431 | std::string *SavedStr = | ||||||
2432 | nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); | ||||||
2433 | return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); | ||||||
2434 | } | ||||||
2435 | |||||||
2436 | // Check to see if the kernel argument is image*_t or sampler_t | ||||||
2437 | |||||||
2438 | static bool isImageOrSamplerVal(const Value *arg, const Module *context) { | ||||||
2439 | static const char *const specialTypes[] = { "struct._image2d_t", | ||||||
2440 | "struct._image3d_t", | ||||||
2441 | "struct._sampler_t" }; | ||||||
2442 | |||||||
2443 | Type *Ty = arg->getType(); | ||||||
2444 | auto *PTy = dyn_cast<PointerType>(Ty); | ||||||
2445 | |||||||
2446 | if (!PTy) | ||||||
2447 | return false; | ||||||
2448 | |||||||
2449 | if (!context) | ||||||
2450 | return false; | ||||||
2451 | |||||||
2452 | auto *STy = dyn_cast<StructType>(PTy->getElementType()); | ||||||
2453 | if (!STy || STy->isLiteral()) | ||||||
2454 | return false; | ||||||
2455 | |||||||
2456 | return std::find(std::begin(specialTypes), std::end(specialTypes), | ||||||
2457 | STy->getName()) != std::end(specialTypes); | ||||||
2458 | } | ||||||
2459 | |||||||
2460 | SDValue NVPTXTargetLowering::LowerFormalArguments( | ||||||
2461 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | ||||||
2462 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | ||||||
2463 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | ||||||
2464 | MachineFunction &MF = DAG.getMachineFunction(); | ||||||
2465 | const DataLayout &DL = DAG.getDataLayout(); | ||||||
2466 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||||
2467 | |||||||
2468 | const Function *F = &MF.getFunction(); | ||||||
2469 | const AttributeList &PAL = F->getAttributes(); | ||||||
2470 | const TargetLowering *TLI = STI.getTargetLowering(); | ||||||
2471 | |||||||
2472 | SDValue Root = DAG.getRoot(); | ||||||
2473 | std::vector<SDValue> OutChains; | ||||||
2474 | |||||||
2475 | bool isABI = (STI.getSmVersion() >= 20); | ||||||
2476 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2476, __PRETTY_FUNCTION__)); | ||||||
2477 | if (!isABI) | ||||||
2478 | return Chain; | ||||||
2479 | |||||||
2480 | std::vector<Type *> argTypes; | ||||||
2481 | std::vector<const Argument *> theArgs; | ||||||
2482 | for (const Argument &I : F->args()) { | ||||||
2483 | theArgs.push_back(&I); | ||||||
2484 | argTypes.push_back(I.getType()); | ||||||
2485 | } | ||||||
2486 | // argTypes.size() (or theArgs.size()) and Ins.size() need not match. | ||||||
2487 | // Ins.size() will be larger | ||||||
2488 | // * if there is an aggregate argument with multiple fields (each field | ||||||
2489 | // showing up separately in Ins) | ||||||
2490 | // * if there is a vector argument with more than typical vector-length | ||||||
2491 | // elements (generally if more than 4) where each vector element is | ||||||
2492 | // individually present in Ins. | ||||||
2493 | // So a different index should be used for indexing into Ins. | ||||||
2494 | // See similar issue in LowerCall. | ||||||
2495 | unsigned InsIdx = 0; | ||||||
2496 | |||||||
2497 | int idx = 0; | ||||||
2498 | for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { | ||||||
2499 | Type *Ty = argTypes[i]; | ||||||
2500 | |||||||
2501 | // If the kernel argument is image*_t or sampler_t, convert it to | ||||||
2502 | // a i32 constant holding the parameter position. This can later | ||||||
2503 | // matched in the AsmPrinter to output the correct mangled name. | ||||||
2504 | if (isImageOrSamplerVal( | ||||||
2505 | theArgs[i], | ||||||
2506 | (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() | ||||||
2507 | : nullptr))) { | ||||||
2508 | assert(isKernelFunction(*F) &&((isKernelFunction(*F) && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("isKernelFunction(*F) && \"Only kernels can have image/sampler params\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2509, __PRETTY_FUNCTION__)) | ||||||
2509 | "Only kernels can have image/sampler params")((isKernelFunction(*F) && "Only kernels can have image/sampler params" ) ? static_cast<void> (0) : __assert_fail ("isKernelFunction(*F) && \"Only kernels can have image/sampler params\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2509, __PRETTY_FUNCTION__)); | ||||||
2510 | InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); | ||||||
2511 | continue; | ||||||
2512 | } | ||||||
2513 | |||||||
2514 | if (theArgs[i]->use_empty()) { | ||||||
2515 | // argument is dead | ||||||
2516 | if (Ty->isAggregateType() || Ty->isIntegerTy(128)) { | ||||||
2517 | SmallVector<EVT, 16> vtparts; | ||||||
2518 | |||||||
2519 | ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); | ||||||
2520 | assert(vtparts.size() > 0 && "empty aggregate type not expected")((vtparts.size() > 0 && "empty aggregate type not expected" ) ? static_cast<void> (0) : __assert_fail ("vtparts.size() > 0 && \"empty aggregate type not expected\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2520, __PRETTY_FUNCTION__)); | ||||||
2521 | for (unsigned parti = 0, parte = vtparts.size(); parti != parte; | ||||||
2522 | ++parti) { | ||||||
2523 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | ||||||
2524 | ++InsIdx; | ||||||
2525 | } | ||||||
2526 | if (vtparts.size() > 0) | ||||||
2527 | --InsIdx; | ||||||
2528 | continue; | ||||||
2529 | } | ||||||
2530 | if (Ty->isVectorTy()) { | ||||||
2531 | EVT ObjectVT = getValueType(DL, Ty); | ||||||
2532 | unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); | ||||||
2533 | for (unsigned parti = 0; parti < NumRegs; ++parti) { | ||||||
2534 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | ||||||
2535 | ++InsIdx; | ||||||
2536 | } | ||||||
2537 | if (NumRegs > 0) | ||||||
2538 | --InsIdx; | ||||||
2539 | continue; | ||||||
2540 | } | ||||||
2541 | InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); | ||||||
2542 | continue; | ||||||
2543 | } | ||||||
2544 | |||||||
2545 | // In the following cases, assign a node order of "idx+1" | ||||||
2546 | // to newly created nodes. The SDNodes for params have to | ||||||
2547 | // appear in the same order as their order of appearance | ||||||
2548 | // in the original function. "idx+1" holds that order. | ||||||
2549 | if (!PAL.hasParamAttribute(i, Attribute::ByVal)) { | ||||||
2550 | bool aggregateIsPacked = false; | ||||||
2551 | if (StructType *STy = dyn_cast<StructType>(Ty)) | ||||||
2552 | aggregateIsPacked = STy->isPacked(); | ||||||
2553 | |||||||
2554 | SmallVector<EVT, 16> VTs; | ||||||
2555 | SmallVector<uint64_t, 16> Offsets; | ||||||
2556 | ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); | ||||||
2557 | assert(VTs.size() > 0 && "Unexpected empty type.")((VTs.size() > 0 && "Unexpected empty type.") ? static_cast <void> (0) : __assert_fail ("VTs.size() > 0 && \"Unexpected empty type.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2557, __PRETTY_FUNCTION__)); | ||||||
2558 | auto VectorInfo = | ||||||
2559 | VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); | ||||||
2560 | |||||||
2561 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | ||||||
2562 | int VecIdx = -1; // Index of the first element of the current vector. | ||||||
2563 | for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) { | ||||||
2564 | if (VectorInfo[parti] & PVF_FIRST) { | ||||||
2565 | assert(VecIdx == -1 && "Orphaned vector.")((VecIdx == -1 && "Orphaned vector.") ? static_cast< void> (0) : __assert_fail ("VecIdx == -1 && \"Orphaned vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2565, __PRETTY_FUNCTION__)); | ||||||
2566 | VecIdx = parti; | ||||||
2567 | } | ||||||
2568 | |||||||
2569 | // That's the last element of this store op. | ||||||
2570 | if (VectorInfo[parti] & PVF_LAST) { | ||||||
2571 | unsigned NumElts = parti - VecIdx + 1; | ||||||
2572 | EVT EltVT = VTs[parti]; | ||||||
2573 | // i1 is loaded/stored as i8. | ||||||
2574 | EVT LoadVT = EltVT; | ||||||
2575 | if (EltVT == MVT::i1) | ||||||
2576 | LoadVT = MVT::i8; | ||||||
2577 | else if (EltVT == MVT::v2f16) | ||||||
2578 | // getLoad needs a vector type, but it can't handle | ||||||
2579 | // vectors which contain v2f16 elements. So we must load | ||||||
2580 | // using i32 here and then bitcast back. | ||||||
2581 | LoadVT = MVT::i32; | ||||||
2582 | |||||||
2583 | EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); | ||||||
2584 | SDValue VecAddr = | ||||||
2585 | DAG.getNode(ISD::ADD, dl, PtrVT, Arg, | ||||||
2586 | DAG.getConstant(Offsets[VecIdx], dl, PtrVT)); | ||||||
2587 | Value *srcValue = Constant::getNullValue(PointerType::get( | ||||||
2588 | EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); | ||||||
2589 | SDValue P = | ||||||
2590 | DAG.getLoad(VecVT, dl, Root, VecAddr, | ||||||
2591 | MachinePointerInfo(srcValue), aggregateIsPacked, | ||||||
2592 | MachineMemOperand::MODereferenceable | | ||||||
2593 | MachineMemOperand::MOInvariant); | ||||||
2594 | if (P.getNode()) | ||||||
2595 | P.getNode()->setIROrder(idx + 1); | ||||||
2596 | for (unsigned j = 0; j < NumElts; ++j) { | ||||||
2597 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P, | ||||||
2598 | DAG.getIntPtrConstant(j, dl)); | ||||||
2599 | // We've loaded i1 as an i8 and now must truncate it back to i1 | ||||||
2600 | if (EltVT == MVT::i1) | ||||||
2601 | Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); | ||||||
2602 | // v2f16 was loaded as an i32. Now we must bitcast it back. | ||||||
2603 | else if (EltVT == MVT::v2f16) | ||||||
2604 | Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); | ||||||
2605 | // Extend the element if necessary (e.g. an i8 is loaded | ||||||
2606 | // into an i16 register) | ||||||
2607 | if (Ins[InsIdx].VT.isInteger() && | ||||||
2608 | Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) { | ||||||
2609 | unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND | ||||||
2610 | : ISD::ZERO_EXTEND; | ||||||
2611 | Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); | ||||||
2612 | } | ||||||
2613 | InVals.push_back(Elt); | ||||||
2614 | } | ||||||
2615 | |||||||
2616 | // Reset vector tracking state. | ||||||
2617 | VecIdx = -1; | ||||||
2618 | } | ||||||
2619 | ++InsIdx; | ||||||
2620 | } | ||||||
2621 | if (VTs.size() > 0) | ||||||
2622 | --InsIdx; | ||||||
2623 | continue; | ||||||
2624 | } | ||||||
2625 | |||||||
2626 | // Param has ByVal attribute | ||||||
2627 | // Return MoveParam(param symbol). | ||||||
2628 | // Ideally, the param symbol can be returned directly, | ||||||
2629 | // but when SDNode builder decides to use it in a CopyToReg(), | ||||||
2630 | // machine instruction fails because TargetExternalSymbol | ||||||
2631 | // (not lowered) is target dependent, and CopyToReg assumes | ||||||
2632 | // the source is lowered. | ||||||
2633 | EVT ObjectVT = getValueType(DL, Ty); | ||||||
2634 | assert(ObjectVT == Ins[InsIdx].VT &&((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2635, __PRETTY_FUNCTION__)) | ||||||
2635 | "Ins type did not match function type")((ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type" ) ? static_cast<void> (0) : __assert_fail ("ObjectVT == Ins[InsIdx].VT && \"Ins type did not match function type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2635, __PRETTY_FUNCTION__)); | ||||||
2636 | SDValue Arg = getParamSymbol(DAG, idx, PtrVT); | ||||||
2637 | SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); | ||||||
2638 | if (p.getNode()) | ||||||
2639 | p.getNode()->setIROrder(idx + 1); | ||||||
2640 | InVals.push_back(p); | ||||||
2641 | } | ||||||
2642 | |||||||
2643 | // Clang will check explicit VarArg and issue error if any. However, Clang | ||||||
2644 | // will let code with | ||||||
2645 | // implicit var arg like f() pass. See bug 617733. | ||||||
2646 | // We treat this case as if the arg list is empty. | ||||||
2647 | // if (F.isVarArg()) { | ||||||
2648 | // assert(0 && "VarArg not supported yet!"); | ||||||
2649 | //} | ||||||
2650 | |||||||
2651 | if (!OutChains.empty()) | ||||||
2652 | DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); | ||||||
2653 | |||||||
2654 | return Chain; | ||||||
2655 | } | ||||||
2656 | |||||||
2657 | SDValue | ||||||
2658 | NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | ||||||
2659 | bool isVarArg, | ||||||
2660 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||||
2661 | const SmallVectorImpl<SDValue> &OutVals, | ||||||
2662 | const SDLoc &dl, SelectionDAG &DAG) const { | ||||||
2663 | MachineFunction &MF = DAG.getMachineFunction(); | ||||||
2664 | Type *RetTy = MF.getFunction().getReturnType(); | ||||||
2665 | |||||||
2666 | bool isABI = (STI.getSmVersion() >= 20); | ||||||
| |||||||
2667 | assert(isABI && "Non-ABI compilation is not supported")((isABI && "Non-ABI compilation is not supported") ? static_cast <void> (0) : __assert_fail ("isABI && \"Non-ABI compilation is not supported\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2667, __PRETTY_FUNCTION__)); | ||||||
2668 | if (!isABI
| ||||||
2669 | return Chain; | ||||||
2670 | |||||||
2671 | const DataLayout DL = DAG.getDataLayout(); | ||||||
2672 | SmallVector<EVT, 16> VTs; | ||||||
2673 | SmallVector<uint64_t, 16> Offsets; | ||||||
2674 | ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); | ||||||
2675 | assert(VTs.size() == OutVals.size() && "Bad return value decomposition")((VTs.size() == OutVals.size() && "Bad return value decomposition" ) ? static_cast<void> (0) : __assert_fail ("VTs.size() == OutVals.size() && \"Bad return value decomposition\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2675, __PRETTY_FUNCTION__)); | ||||||
2676 | |||||||
2677 | auto VectorInfo = VectorizePTXValueVTs( | ||||||
2678 | VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); | ||||||
2679 | |||||||
2680 | // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than | ||||||
2681 | // 32-bits are sign extended or zero extended, depending on whether | ||||||
2682 | // they are signed or unsigned types. | ||||||
2683 | bool ExtendIntegerRetVal = | ||||||
2684 | RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; | ||||||
2685 | |||||||
2686 | SmallVector<SDValue, 6> StoreOperands; | ||||||
2687 | for (unsigned i = 0, e = VTs.size(); i != e; ++i) { | ||||||
2688 | // New load/store. Record chain and offset operands. | ||||||
2689 | if (VectorInfo[i] & PVF_FIRST) { | ||||||
2690 | assert(StoreOperands.empty() && "Orphaned operand list.")((StoreOperands.empty() && "Orphaned operand list.") ? static_cast<void> (0) : __assert_fail ("StoreOperands.empty() && \"Orphaned operand list.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2690, __PRETTY_FUNCTION__)); | ||||||
2691 | StoreOperands.push_back(Chain); | ||||||
2692 | StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); | ||||||
2693 | } | ||||||
2694 | |||||||
2695 | SDValue RetVal = OutVals[i]; | ||||||
2696 | if (ExtendIntegerRetVal) { | ||||||
2697 | RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND | ||||||
2698 | : ISD::ZERO_EXTEND, | ||||||
2699 | dl, MVT::i32, RetVal); | ||||||
2700 | } else if (RetVal.getValueSizeInBits() < 16) { | ||||||
2701 | // Use 16-bit registers for small load-stores as it's the | ||||||
2702 | // smallest general purpose register size supported by NVPTX. | ||||||
2703 | RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); | ||||||
2704 | } | ||||||
2705 | |||||||
2706 | // Record the value to return. | ||||||
2707 | StoreOperands.push_back(RetVal); | ||||||
2708 | |||||||
2709 | // That's the last element of this store op. | ||||||
2710 | if (VectorInfo[i] & PVF_LAST) { | ||||||
2711 | NVPTXISD::NodeType Op; | ||||||
2712 | unsigned NumElts = StoreOperands.size() - 2; | ||||||
2713 | switch (NumElts) { | ||||||
2714 | case 1: | ||||||
2715 | Op = NVPTXISD::StoreRetval; | ||||||
2716 | break; | ||||||
2717 | case 2: | ||||||
2718 | Op = NVPTXISD::StoreRetvalV2; | ||||||
2719 | break; | ||||||
2720 | case 4: | ||||||
2721 | Op = NVPTXISD::StoreRetvalV4; | ||||||
2722 | break; | ||||||
2723 | default: | ||||||
2724 | llvm_unreachable("Invalid vector info.")::llvm::llvm_unreachable_internal("Invalid vector info.", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 2724); | ||||||
2725 | } | ||||||
2726 | |||||||
2727 | // Adjust type of load/store op if we've extended the scalar | ||||||
2728 | // return value. | ||||||
2729 | EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; | ||||||
2730 | Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), | ||||||
2731 | StoreOperands, TheStoreType, | ||||||
2732 | MachinePointerInfo(), /* Align */ 1, | ||||||
2733 | MachineMemOperand::MOStore); | ||||||
2734 | // Cleanup vector state. | ||||||
2735 | StoreOperands.clear(); | ||||||
2736 | } | ||||||
2737 | } | ||||||
2738 | |||||||
2739 | return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); | ||||||
2740 | } | ||||||
2741 | |||||||
2742 | void NVPTXTargetLowering::LowerAsmOperandForConstraint( | ||||||
2743 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, | ||||||
2744 | SelectionDAG &DAG) const { | ||||||
2745 | if (Constraint.length() > 1) | ||||||
2746 | return; | ||||||
2747 | else | ||||||
2748 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); | ||||||
2749 | } | ||||||
2750 | |||||||
2751 | static unsigned getOpcForTextureInstr(unsigned Intrinsic) { | ||||||
2752 | switch (Intrinsic) { | ||||||
2753 | default: | ||||||
2754 | return 0; | ||||||
2755 | |||||||
2756 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | ||||||
2757 | return NVPTXISD::Tex1DFloatS32; | ||||||
2758 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | ||||||
2759 | return NVPTXISD::Tex1DFloatFloat; | ||||||
2760 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | ||||||
2761 | return NVPTXISD::Tex1DFloatFloatLevel; | ||||||
2762 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | ||||||
2763 | return NVPTXISD::Tex1DFloatFloatGrad; | ||||||
2764 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | ||||||
2765 | return NVPTXISD::Tex1DS32S32; | ||||||
2766 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | ||||||
2767 | return NVPTXISD::Tex1DS32Float; | ||||||
2768 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | ||||||
2769 | return NVPTXISD::Tex1DS32FloatLevel; | ||||||
2770 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | ||||||
2771 | return NVPTXISD::Tex1DS32FloatGrad; | ||||||
2772 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | ||||||
2773 | return NVPTXISD::Tex1DU32S32; | ||||||
2774 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | ||||||
2775 | return NVPTXISD::Tex1DU32Float; | ||||||
2776 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | ||||||
2777 | return NVPTXISD::Tex1DU32FloatLevel; | ||||||
2778 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | ||||||
2779 | return NVPTXISD::Tex1DU32FloatGrad; | ||||||
2780 | |||||||
2781 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | ||||||
2782 | return NVPTXISD::Tex1DArrayFloatS32; | ||||||
2783 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | ||||||
2784 | return NVPTXISD::Tex1DArrayFloatFloat; | ||||||
2785 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | ||||||
2786 | return NVPTXISD::Tex1DArrayFloatFloatLevel; | ||||||
2787 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | ||||||
2788 | return NVPTXISD::Tex1DArrayFloatFloatGrad; | ||||||
2789 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | ||||||
2790 | return NVPTXISD::Tex1DArrayS32S32; | ||||||
2791 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | ||||||
2792 | return NVPTXISD::Tex1DArrayS32Float; | ||||||
2793 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | ||||||
2794 | return NVPTXISD::Tex1DArrayS32FloatLevel; | ||||||
2795 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | ||||||
2796 | return NVPTXISD::Tex1DArrayS32FloatGrad; | ||||||
2797 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | ||||||
2798 | return NVPTXISD::Tex1DArrayU32S32; | ||||||
2799 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | ||||||
2800 | return NVPTXISD::Tex1DArrayU32Float; | ||||||
2801 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | ||||||
2802 | return NVPTXISD::Tex1DArrayU32FloatLevel; | ||||||
2803 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | ||||||
2804 | return NVPTXISD::Tex1DArrayU32FloatGrad; | ||||||
2805 | |||||||
2806 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | ||||||
2807 | return NVPTXISD::Tex2DFloatS32; | ||||||
2808 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | ||||||
2809 | return NVPTXISD::Tex2DFloatFloat; | ||||||
2810 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | ||||||
2811 | return NVPTXISD::Tex2DFloatFloatLevel; | ||||||
2812 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | ||||||
2813 | return NVPTXISD::Tex2DFloatFloatGrad; | ||||||
2814 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | ||||||
2815 | return NVPTXISD::Tex2DS32S32; | ||||||
2816 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | ||||||
2817 | return NVPTXISD::Tex2DS32Float; | ||||||
2818 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | ||||||
2819 | return NVPTXISD::Tex2DS32FloatLevel; | ||||||
2820 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | ||||||
2821 | return NVPTXISD::Tex2DS32FloatGrad; | ||||||
2822 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | ||||||
2823 | return NVPTXISD::Tex2DU32S32; | ||||||
2824 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | ||||||
2825 | return NVPTXISD::Tex2DU32Float; | ||||||
2826 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | ||||||
2827 | return NVPTXISD::Tex2DU32FloatLevel; | ||||||
2828 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | ||||||
2829 | return NVPTXISD::Tex2DU32FloatGrad; | ||||||
2830 | |||||||
2831 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | ||||||
2832 | return NVPTXISD::Tex2DArrayFloatS32; | ||||||
2833 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | ||||||
2834 | return NVPTXISD::Tex2DArrayFloatFloat; | ||||||
2835 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | ||||||
2836 | return NVPTXISD::Tex2DArrayFloatFloatLevel; | ||||||
2837 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | ||||||
2838 | return NVPTXISD::Tex2DArrayFloatFloatGrad; | ||||||
2839 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | ||||||
2840 | return NVPTXISD::Tex2DArrayS32S32; | ||||||
2841 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | ||||||
2842 | return NVPTXISD::Tex2DArrayS32Float; | ||||||
2843 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | ||||||
2844 | return NVPTXISD::Tex2DArrayS32FloatLevel; | ||||||
2845 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | ||||||
2846 | return NVPTXISD::Tex2DArrayS32FloatGrad; | ||||||
2847 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | ||||||
2848 | return NVPTXISD::Tex2DArrayU32S32; | ||||||
2849 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | ||||||
2850 | return NVPTXISD::Tex2DArrayU32Float; | ||||||
2851 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | ||||||
2852 | return NVPTXISD::Tex2DArrayU32FloatLevel; | ||||||
2853 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | ||||||
2854 | return NVPTXISD::Tex2DArrayU32FloatGrad; | ||||||
2855 | |||||||
2856 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | ||||||
2857 | return NVPTXISD::Tex3DFloatS32; | ||||||
2858 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | ||||||
2859 | return NVPTXISD::Tex3DFloatFloat; | ||||||
2860 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | ||||||
2861 | return NVPTXISD::Tex3DFloatFloatLevel; | ||||||
2862 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | ||||||
2863 | return NVPTXISD::Tex3DFloatFloatGrad; | ||||||
2864 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | ||||||
2865 | return NVPTXISD::Tex3DS32S32; | ||||||
2866 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | ||||||
2867 | return NVPTXISD::Tex3DS32Float; | ||||||
2868 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | ||||||
2869 | return NVPTXISD::Tex3DS32FloatLevel; | ||||||
2870 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | ||||||
2871 | return NVPTXISD::Tex3DS32FloatGrad; | ||||||
2872 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | ||||||
2873 | return NVPTXISD::Tex3DU32S32; | ||||||
2874 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | ||||||
2875 | return NVPTXISD::Tex3DU32Float; | ||||||
2876 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | ||||||
2877 | return NVPTXISD::Tex3DU32FloatLevel; | ||||||
2878 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | ||||||
2879 | return NVPTXISD::Tex3DU32FloatGrad; | ||||||
2880 | |||||||
2881 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | ||||||
2882 | return NVPTXISD::TexCubeFloatFloat; | ||||||
2883 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | ||||||
2884 | return NVPTXISD::TexCubeFloatFloatLevel; | ||||||
2885 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | ||||||
2886 | return NVPTXISD::TexCubeS32Float; | ||||||
2887 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | ||||||
2888 | return NVPTXISD::TexCubeS32FloatLevel; | ||||||
2889 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | ||||||
2890 | return NVPTXISD::TexCubeU32Float; | ||||||
2891 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | ||||||
2892 | return NVPTXISD::TexCubeU32FloatLevel; | ||||||
2893 | |||||||
2894 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | ||||||
2895 | return NVPTXISD::TexCubeArrayFloatFloat; | ||||||
2896 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | ||||||
2897 | return NVPTXISD::TexCubeArrayFloatFloatLevel; | ||||||
2898 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | ||||||
2899 | return NVPTXISD::TexCubeArrayS32Float; | ||||||
2900 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | ||||||
2901 | return NVPTXISD::TexCubeArrayS32FloatLevel; | ||||||
2902 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | ||||||
2903 | return NVPTXISD::TexCubeArrayU32Float; | ||||||
2904 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | ||||||
2905 | return NVPTXISD::TexCubeArrayU32FloatLevel; | ||||||
2906 | |||||||
2907 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | ||||||
2908 | return NVPTXISD::Tld4R2DFloatFloat; | ||||||
2909 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | ||||||
2910 | return NVPTXISD::Tld4G2DFloatFloat; | ||||||
2911 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | ||||||
2912 | return NVPTXISD::Tld4B2DFloatFloat; | ||||||
2913 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | ||||||
2914 | return NVPTXISD::Tld4A2DFloatFloat; | ||||||
2915 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | ||||||
2916 | return NVPTXISD::Tld4R2DS64Float; | ||||||
2917 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | ||||||
2918 | return NVPTXISD::Tld4G2DS64Float; | ||||||
2919 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | ||||||
2920 | return NVPTXISD::Tld4B2DS64Float; | ||||||
2921 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | ||||||
2922 | return NVPTXISD::Tld4A2DS64Float; | ||||||
2923 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | ||||||
2924 | return NVPTXISD::Tld4R2DU64Float; | ||||||
2925 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | ||||||
2926 | return NVPTXISD::Tld4G2DU64Float; | ||||||
2927 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | ||||||
2928 | return NVPTXISD::Tld4B2DU64Float; | ||||||
2929 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | ||||||
2930 | return NVPTXISD::Tld4A2DU64Float; | ||||||
2931 | |||||||
2932 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | ||||||
2933 | return NVPTXISD::TexUnified1DFloatS32; | ||||||
2934 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | ||||||
2935 | return NVPTXISD::TexUnified1DFloatFloat; | ||||||
2936 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | ||||||
2937 | return NVPTXISD::TexUnified1DFloatFloatLevel; | ||||||
2938 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | ||||||
2939 | return NVPTXISD::TexUnified1DFloatFloatGrad; | ||||||
2940 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | ||||||
2941 | return NVPTXISD::TexUnified1DS32S32; | ||||||
2942 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | ||||||
2943 | return NVPTXISD::TexUnified1DS32Float; | ||||||
2944 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | ||||||
2945 | return NVPTXISD::TexUnified1DS32FloatLevel; | ||||||
2946 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | ||||||
2947 | return NVPTXISD::TexUnified1DS32FloatGrad; | ||||||
2948 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | ||||||
2949 | return NVPTXISD::TexUnified1DU32S32; | ||||||
2950 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | ||||||
2951 | return NVPTXISD::TexUnified1DU32Float; | ||||||
2952 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | ||||||
2953 | return NVPTXISD::TexUnified1DU32FloatLevel; | ||||||
2954 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | ||||||
2955 | return NVPTXISD::TexUnified1DU32FloatGrad; | ||||||
2956 | |||||||
2957 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | ||||||
2958 | return NVPTXISD::TexUnified1DArrayFloatS32; | ||||||
2959 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | ||||||
2960 | return NVPTXISD::TexUnified1DArrayFloatFloat; | ||||||
2961 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | ||||||
2962 | return NVPTXISD::TexUnified1DArrayFloatFloatLevel; | ||||||
2963 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | ||||||
2964 | return NVPTXISD::TexUnified1DArrayFloatFloatGrad; | ||||||
2965 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | ||||||
2966 | return NVPTXISD::TexUnified1DArrayS32S32; | ||||||
2967 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | ||||||
2968 | return NVPTXISD::TexUnified1DArrayS32Float; | ||||||
2969 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | ||||||
2970 | return NVPTXISD::TexUnified1DArrayS32FloatLevel; | ||||||
2971 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | ||||||
2972 | return NVPTXISD::TexUnified1DArrayS32FloatGrad; | ||||||
2973 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | ||||||
2974 | return NVPTXISD::TexUnified1DArrayU32S32; | ||||||
2975 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | ||||||
2976 | return NVPTXISD::TexUnified1DArrayU32Float; | ||||||
2977 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | ||||||
2978 | return NVPTXISD::TexUnified1DArrayU32FloatLevel; | ||||||
2979 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | ||||||
2980 | return NVPTXISD::TexUnified1DArrayU32FloatGrad; | ||||||
2981 | |||||||
2982 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | ||||||
2983 | return NVPTXISD::TexUnified2DFloatS32; | ||||||
2984 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | ||||||
2985 | return NVPTXISD::TexUnified2DFloatFloat; | ||||||
2986 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | ||||||
2987 | return NVPTXISD::TexUnified2DFloatFloatLevel; | ||||||
2988 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | ||||||
2989 | return NVPTXISD::TexUnified2DFloatFloatGrad; | ||||||
2990 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | ||||||
2991 | return NVPTXISD::TexUnified2DS32S32; | ||||||
2992 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | ||||||
2993 | return NVPTXISD::TexUnified2DS32Float; | ||||||
2994 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | ||||||
2995 | return NVPTXISD::TexUnified2DS32FloatLevel; | ||||||
2996 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | ||||||
2997 | return NVPTXISD::TexUnified2DS32FloatGrad; | ||||||
2998 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | ||||||
2999 | return NVPTXISD::TexUnified2DU32S32; | ||||||
3000 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | ||||||
3001 | return NVPTXISD::TexUnified2DU32Float; | ||||||
3002 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | ||||||
3003 | return NVPTXISD::TexUnified2DU32FloatLevel; | ||||||
3004 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | ||||||
3005 | return NVPTXISD::TexUnified2DU32FloatGrad; | ||||||
3006 | |||||||
3007 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | ||||||
3008 | return NVPTXISD::TexUnified2DArrayFloatS32; | ||||||
3009 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | ||||||
3010 | return NVPTXISD::TexUnified2DArrayFloatFloat; | ||||||
3011 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | ||||||
3012 | return NVPTXISD::TexUnified2DArrayFloatFloatLevel; | ||||||
3013 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | ||||||
3014 | return NVPTXISD::TexUnified2DArrayFloatFloatGrad; | ||||||
3015 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | ||||||
3016 | return NVPTXISD::TexUnified2DArrayS32S32; | ||||||
3017 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | ||||||
3018 | return NVPTXISD::TexUnified2DArrayS32Float; | ||||||
3019 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | ||||||
3020 | return NVPTXISD::TexUnified2DArrayS32FloatLevel; | ||||||
3021 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | ||||||
3022 | return NVPTXISD::TexUnified2DArrayS32FloatGrad; | ||||||
3023 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | ||||||
3024 | return NVPTXISD::TexUnified2DArrayU32S32; | ||||||
3025 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | ||||||
3026 | return NVPTXISD::TexUnified2DArrayU32Float; | ||||||
3027 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | ||||||
3028 | return NVPTXISD::TexUnified2DArrayU32FloatLevel; | ||||||
3029 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | ||||||
3030 | return NVPTXISD::TexUnified2DArrayU32FloatGrad; | ||||||
3031 | |||||||
3032 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | ||||||
3033 | return NVPTXISD::TexUnified3DFloatS32; | ||||||
3034 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | ||||||
3035 | return NVPTXISD::TexUnified3DFloatFloat; | ||||||
3036 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | ||||||
3037 | return NVPTXISD::TexUnified3DFloatFloatLevel; | ||||||
3038 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | ||||||
3039 | return NVPTXISD::TexUnified3DFloatFloatGrad; | ||||||
3040 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | ||||||
3041 | return NVPTXISD::TexUnified3DS32S32; | ||||||
3042 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | ||||||
3043 | return NVPTXISD::TexUnified3DS32Float; | ||||||
3044 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | ||||||
3045 | return NVPTXISD::TexUnified3DS32FloatLevel; | ||||||
3046 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | ||||||
3047 | return NVPTXISD::TexUnified3DS32FloatGrad; | ||||||
3048 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | ||||||
3049 | return NVPTXISD::TexUnified3DU32S32; | ||||||
3050 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | ||||||
3051 | return NVPTXISD::TexUnified3DU32Float; | ||||||
3052 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | ||||||
3053 | return NVPTXISD::TexUnified3DU32FloatLevel; | ||||||
3054 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | ||||||
3055 | return NVPTXISD::TexUnified3DU32FloatGrad; | ||||||
3056 | |||||||
3057 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | ||||||
3058 | return NVPTXISD::TexUnifiedCubeFloatFloat; | ||||||
3059 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | ||||||
3060 | return NVPTXISD::TexUnifiedCubeFloatFloatLevel; | ||||||
3061 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | ||||||
3062 | return NVPTXISD::TexUnifiedCubeS32Float; | ||||||
3063 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | ||||||
3064 | return NVPTXISD::TexUnifiedCubeS32FloatLevel; | ||||||
3065 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | ||||||
3066 | return NVPTXISD::TexUnifiedCubeU32Float; | ||||||
3067 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | ||||||
3068 | return NVPTXISD::TexUnifiedCubeU32FloatLevel; | ||||||
3069 | |||||||
3070 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | ||||||
3071 | return NVPTXISD::TexUnifiedCubeArrayFloatFloat; | ||||||
3072 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | ||||||
3073 | return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; | ||||||
3074 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | ||||||
3075 | return NVPTXISD::TexUnifiedCubeArrayS32Float; | ||||||
3076 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | ||||||
3077 | return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; | ||||||
3078 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | ||||||
3079 | return NVPTXISD::TexUnifiedCubeArrayU32Float; | ||||||
3080 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | ||||||
3081 | return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; | ||||||
3082 | |||||||
3083 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | ||||||
3084 | return NVPTXISD::Tld4UnifiedR2DFloatFloat; | ||||||
3085 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | ||||||
3086 | return NVPTXISD::Tld4UnifiedG2DFloatFloat; | ||||||
3087 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | ||||||
3088 | return NVPTXISD::Tld4UnifiedB2DFloatFloat; | ||||||
3089 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: | ||||||
3090 | return NVPTXISD::Tld4UnifiedA2DFloatFloat; | ||||||
3091 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | ||||||
3092 | return NVPTXISD::Tld4UnifiedR2DS64Float; | ||||||
3093 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | ||||||
3094 | return NVPTXISD::Tld4UnifiedG2DS64Float; | ||||||
3095 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | ||||||
3096 | return NVPTXISD::Tld4UnifiedB2DS64Float; | ||||||
3097 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | ||||||
3098 | return NVPTXISD::Tld4UnifiedA2DS64Float; | ||||||
3099 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | ||||||
3100 | return NVPTXISD::Tld4UnifiedR2DU64Float; | ||||||
3101 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | ||||||
3102 | return NVPTXISD::Tld4UnifiedG2DU64Float; | ||||||
3103 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | ||||||
3104 | return NVPTXISD::Tld4UnifiedB2DU64Float; | ||||||
3105 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: | ||||||
3106 | return NVPTXISD::Tld4UnifiedA2DU64Float; | ||||||
3107 | } | ||||||
3108 | } | ||||||
3109 | |||||||
3110 | static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { | ||||||
3111 | switch (Intrinsic) { | ||||||
3112 | default: | ||||||
3113 | return 0; | ||||||
3114 | case Intrinsic::nvvm_suld_1d_i8_clamp: | ||||||
3115 | return NVPTXISD::Suld1DI8Clamp; | ||||||
3116 | case Intrinsic::nvvm_suld_1d_i16_clamp: | ||||||
3117 | return NVPTXISD::Suld1DI16Clamp; | ||||||
3118 | case Intrinsic::nvvm_suld_1d_i32_clamp: | ||||||
3119 | return NVPTXISD::Suld1DI32Clamp; | ||||||
3120 | case Intrinsic::nvvm_suld_1d_i64_clamp: | ||||||
3121 | return NVPTXISD::Suld1DI64Clamp; | ||||||
3122 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | ||||||
3123 | return NVPTXISD::Suld1DV2I8Clamp; | ||||||
3124 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | ||||||
3125 | return NVPTXISD::Suld1DV2I16Clamp; | ||||||
3126 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | ||||||
3127 | return NVPTXISD::Suld1DV2I32Clamp; | ||||||
3128 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | ||||||
3129 | return NVPTXISD::Suld1DV2I64Clamp; | ||||||
3130 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | ||||||
3131 | return NVPTXISD::Suld1DV4I8Clamp; | ||||||
3132 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | ||||||
3133 | return NVPTXISD::Suld1DV4I16Clamp; | ||||||
3134 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | ||||||
3135 | return NVPTXISD::Suld1DV4I32Clamp; | ||||||
3136 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | ||||||
3137 | return NVPTXISD::Suld1DArrayI8Clamp; | ||||||
3138 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | ||||||
3139 | return NVPTXISD::Suld1DArrayI16Clamp; | ||||||
3140 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | ||||||
3141 | return NVPTXISD::Suld1DArrayI32Clamp; | ||||||
3142 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | ||||||
3143 | return NVPTXISD::Suld1DArrayI64Clamp; | ||||||
3144 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | ||||||
3145 | return NVPTXISD::Suld1DArrayV2I8Clamp; | ||||||
3146 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | ||||||
3147 | return NVPTXISD::Suld1DArrayV2I16Clamp; | ||||||
3148 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | ||||||
3149 | return NVPTXISD::Suld1DArrayV2I32Clamp; | ||||||
3150 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | ||||||
3151 | return NVPTXISD::Suld1DArrayV2I64Clamp; | ||||||
3152 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | ||||||
3153 | return NVPTXISD::Suld1DArrayV4I8Clamp; | ||||||
3154 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | ||||||
3155 | return NVPTXISD::Suld1DArrayV4I16Clamp; | ||||||
3156 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | ||||||
3157 | return NVPTXISD::Suld1DArrayV4I32Clamp; | ||||||
3158 | case Intrinsic::nvvm_suld_2d_i8_clamp: | ||||||
3159 | return NVPTXISD::Suld2DI8Clamp; | ||||||
3160 | case Intrinsic::nvvm_suld_2d_i16_clamp: | ||||||
3161 | return NVPTXISD::Suld2DI16Clamp; | ||||||
3162 | case Intrinsic::nvvm_suld_2d_i32_clamp: | ||||||
3163 | return NVPTXISD::Suld2DI32Clamp; | ||||||
3164 | case Intrinsic::nvvm_suld_2d_i64_clamp: | ||||||
3165 | return NVPTXISD::Suld2DI64Clamp; | ||||||
3166 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | ||||||
3167 | return NVPTXISD::Suld2DV2I8Clamp; | ||||||
3168 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | ||||||
3169 | return NVPTXISD::Suld2DV2I16Clamp; | ||||||
3170 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | ||||||
3171 | return NVPTXISD::Suld2DV2I32Clamp; | ||||||
3172 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | ||||||
3173 | return NVPTXISD::Suld2DV2I64Clamp; | ||||||
3174 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | ||||||
3175 | return NVPTXISD::Suld2DV4I8Clamp; | ||||||
3176 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | ||||||
3177 | return NVPTXISD::Suld2DV4I16Clamp; | ||||||
3178 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | ||||||
3179 | return NVPTXISD::Suld2DV4I32Clamp; | ||||||
3180 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | ||||||
3181 | return NVPTXISD::Suld2DArrayI8Clamp; | ||||||
3182 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | ||||||
3183 | return NVPTXISD::Suld2DArrayI16Clamp; | ||||||
3184 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | ||||||
3185 | return NVPTXISD::Suld2DArrayI32Clamp; | ||||||
3186 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | ||||||
3187 | return NVPTXISD::Suld2DArrayI64Clamp; | ||||||
3188 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | ||||||
3189 | return NVPTXISD::Suld2DArrayV2I8Clamp; | ||||||
3190 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | ||||||
3191 | return NVPTXISD::Suld2DArrayV2I16Clamp; | ||||||
3192 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | ||||||
3193 | return NVPTXISD::Suld2DArrayV2I32Clamp; | ||||||
3194 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | ||||||
3195 | return NVPTXISD::Suld2DArrayV2I64Clamp; | ||||||
3196 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | ||||||
3197 | return NVPTXISD::Suld2DArrayV4I8Clamp; | ||||||
3198 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | ||||||
3199 | return NVPTXISD::Suld2DArrayV4I16Clamp; | ||||||
3200 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | ||||||
3201 | return NVPTXISD::Suld2DArrayV4I32Clamp; | ||||||
3202 | case Intrinsic::nvvm_suld_3d_i8_clamp: | ||||||
3203 | return NVPTXISD::Suld3DI8Clamp; | ||||||
3204 | case Intrinsic::nvvm_suld_3d_i16_clamp: | ||||||
3205 | return NVPTXISD::Suld3DI16Clamp; | ||||||
3206 | case Intrinsic::nvvm_suld_3d_i32_clamp: | ||||||
3207 | return NVPTXISD::Suld3DI32Clamp; | ||||||
3208 | case Intrinsic::nvvm_suld_3d_i64_clamp: | ||||||
3209 | return NVPTXISD::Suld3DI64Clamp; | ||||||
3210 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | ||||||
3211 | return NVPTXISD::Suld3DV2I8Clamp; | ||||||
3212 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | ||||||
3213 | return NVPTXISD::Suld3DV2I16Clamp; | ||||||
3214 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | ||||||
3215 | return NVPTXISD::Suld3DV2I32Clamp; | ||||||
3216 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | ||||||
3217 | return NVPTXISD::Suld3DV2I64Clamp; | ||||||
3218 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | ||||||
3219 | return NVPTXISD::Suld3DV4I8Clamp; | ||||||
3220 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | ||||||
3221 | return NVPTXISD::Suld3DV4I16Clamp; | ||||||
3222 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | ||||||
3223 | return NVPTXISD::Suld3DV4I32Clamp; | ||||||
3224 | case Intrinsic::nvvm_suld_1d_i8_trap: | ||||||
3225 | return NVPTXISD::Suld1DI8Trap; | ||||||
3226 | case Intrinsic::nvvm_suld_1d_i16_trap: | ||||||
3227 | return NVPTXISD::Suld1DI16Trap; | ||||||
3228 | case Intrinsic::nvvm_suld_1d_i32_trap: | ||||||
3229 | return NVPTXISD::Suld1DI32Trap; | ||||||
3230 | case Intrinsic::nvvm_suld_1d_i64_trap: | ||||||
3231 | return NVPTXISD::Suld1DI64Trap; | ||||||
3232 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | ||||||
3233 | return NVPTXISD::Suld1DV2I8Trap; | ||||||
3234 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | ||||||
3235 | return NVPTXISD::Suld1DV2I16Trap; | ||||||
3236 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | ||||||
3237 | return NVPTXISD::Suld1DV2I32Trap; | ||||||
3238 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | ||||||
3239 | return NVPTXISD::Suld1DV2I64Trap; | ||||||
3240 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | ||||||
3241 | return NVPTXISD::Suld1DV4I8Trap; | ||||||
3242 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | ||||||
3243 | return NVPTXISD::Suld1DV4I16Trap; | ||||||
3244 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | ||||||
3245 | return NVPTXISD::Suld1DV4I32Trap; | ||||||
3246 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | ||||||
3247 | return NVPTXISD::Suld1DArrayI8Trap; | ||||||
3248 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | ||||||
3249 | return NVPTXISD::Suld1DArrayI16Trap; | ||||||
3250 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | ||||||
3251 | return NVPTXISD::Suld1DArrayI32Trap; | ||||||
3252 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | ||||||
3253 | return NVPTXISD::Suld1DArrayI64Trap; | ||||||
3254 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | ||||||
3255 | return NVPTXISD::Suld1DArrayV2I8Trap; | ||||||
3256 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | ||||||
3257 | return NVPTXISD::Suld1DArrayV2I16Trap; | ||||||
3258 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | ||||||
3259 | return NVPTXISD::Suld1DArrayV2I32Trap; | ||||||
3260 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | ||||||
3261 | return NVPTXISD::Suld1DArrayV2I64Trap; | ||||||
3262 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | ||||||
3263 | return NVPTXISD::Suld1DArrayV4I8Trap; | ||||||
3264 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | ||||||
3265 | return NVPTXISD::Suld1DArrayV4I16Trap; | ||||||
3266 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | ||||||
3267 | return NVPTXISD::Suld1DArrayV4I32Trap; | ||||||
3268 | case Intrinsic::nvvm_suld_2d_i8_trap: | ||||||
3269 | return NVPTXISD::Suld2DI8Trap; | ||||||
3270 | case Intrinsic::nvvm_suld_2d_i16_trap: | ||||||
3271 | return NVPTXISD::Suld2DI16Trap; | ||||||
3272 | case Intrinsic::nvvm_suld_2d_i32_trap: | ||||||
3273 | return NVPTXISD::Suld2DI32Trap; | ||||||
3274 | case Intrinsic::nvvm_suld_2d_i64_trap: | ||||||
3275 | return NVPTXISD::Suld2DI64Trap; | ||||||
3276 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | ||||||
3277 | return NVPTXISD::Suld2DV2I8Trap; | ||||||
3278 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | ||||||
3279 | return NVPTXISD::Suld2DV2I16Trap; | ||||||
3280 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | ||||||
3281 | return NVPTXISD::Suld2DV2I32Trap; | ||||||
3282 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | ||||||
3283 | return NVPTXISD::Suld2DV2I64Trap; | ||||||
3284 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | ||||||
3285 | return NVPTXISD::Suld2DV4I8Trap; | ||||||
3286 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | ||||||
3287 | return NVPTXISD::Suld2DV4I16Trap; | ||||||
3288 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | ||||||
3289 | return NVPTXISD::Suld2DV4I32Trap; | ||||||
3290 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | ||||||
3291 | return NVPTXISD::Suld2DArrayI8Trap; | ||||||
3292 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | ||||||
3293 | return NVPTXISD::Suld2DArrayI16Trap; | ||||||
3294 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | ||||||
3295 | return NVPTXISD::Suld2DArrayI32Trap; | ||||||
3296 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | ||||||
3297 | return NVPTXISD::Suld2DArrayI64Trap; | ||||||
3298 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | ||||||
3299 | return NVPTXISD::Suld2DArrayV2I8Trap; | ||||||
3300 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | ||||||
3301 | return NVPTXISD::Suld2DArrayV2I16Trap; | ||||||
3302 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | ||||||
3303 | return NVPTXISD::Suld2DArrayV2I32Trap; | ||||||
3304 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | ||||||
3305 | return NVPTXISD::Suld2DArrayV2I64Trap; | ||||||
3306 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | ||||||
3307 | return NVPTXISD::Suld2DArrayV4I8Trap; | ||||||
3308 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | ||||||
3309 | return NVPTXISD::Suld2DArrayV4I16Trap; | ||||||
3310 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | ||||||
3311 | return NVPTXISD::Suld2DArrayV4I32Trap; | ||||||
3312 | case Intrinsic::nvvm_suld_3d_i8_trap: | ||||||
3313 | return NVPTXISD::Suld3DI8Trap; | ||||||
3314 | case Intrinsic::nvvm_suld_3d_i16_trap: | ||||||
3315 | return NVPTXISD::Suld3DI16Trap; | ||||||
3316 | case Intrinsic::nvvm_suld_3d_i32_trap: | ||||||
3317 | return NVPTXISD::Suld3DI32Trap; | ||||||
3318 | case Intrinsic::nvvm_suld_3d_i64_trap: | ||||||
3319 | return NVPTXISD::Suld3DI64Trap; | ||||||
3320 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | ||||||
3321 | return NVPTXISD::Suld3DV2I8Trap; | ||||||
3322 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | ||||||
3323 | return NVPTXISD::Suld3DV2I16Trap; | ||||||
3324 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | ||||||
3325 | return NVPTXISD::Suld3DV2I32Trap; | ||||||
3326 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | ||||||
3327 | return NVPTXISD::Suld3DV2I64Trap; | ||||||
3328 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | ||||||
3329 | return NVPTXISD::Suld3DV4I8Trap; | ||||||
3330 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | ||||||
3331 | return NVPTXISD::Suld3DV4I16Trap; | ||||||
3332 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | ||||||
3333 | return NVPTXISD::Suld3DV4I32Trap; | ||||||
3334 | case Intrinsic::nvvm_suld_1d_i8_zero: | ||||||
3335 | return NVPTXISD::Suld1DI8Zero; | ||||||
3336 | case Intrinsic::nvvm_suld_1d_i16_zero: | ||||||
3337 | return NVPTXISD::Suld1DI16Zero; | ||||||
3338 | case Intrinsic::nvvm_suld_1d_i32_zero: | ||||||
3339 | return NVPTXISD::Suld1DI32Zero; | ||||||
3340 | case Intrinsic::nvvm_suld_1d_i64_zero: | ||||||
3341 | return NVPTXISD::Suld1DI64Zero; | ||||||
3342 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | ||||||
3343 | return NVPTXISD::Suld1DV2I8Zero; | ||||||
3344 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | ||||||
3345 | return NVPTXISD::Suld1DV2I16Zero; | ||||||
3346 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | ||||||
3347 | return NVPTXISD::Suld1DV2I32Zero; | ||||||
3348 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | ||||||
3349 | return NVPTXISD::Suld1DV2I64Zero; | ||||||
3350 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | ||||||
3351 | return NVPTXISD::Suld1DV4I8Zero; | ||||||
3352 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | ||||||
3353 | return NVPTXISD::Suld1DV4I16Zero; | ||||||
3354 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | ||||||
3355 | return NVPTXISD::Suld1DV4I32Zero; | ||||||
3356 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | ||||||
3357 | return NVPTXISD::Suld1DArrayI8Zero; | ||||||
3358 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | ||||||
3359 | return NVPTXISD::Suld1DArrayI16Zero; | ||||||
3360 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | ||||||
3361 | return NVPTXISD::Suld1DArrayI32Zero; | ||||||
3362 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | ||||||
3363 | return NVPTXISD::Suld1DArrayI64Zero; | ||||||
3364 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | ||||||
3365 | return NVPTXISD::Suld1DArrayV2I8Zero; | ||||||
3366 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | ||||||
3367 | return NVPTXISD::Suld1DArrayV2I16Zero; | ||||||
3368 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | ||||||
3369 | return NVPTXISD::Suld1DArrayV2I32Zero; | ||||||
3370 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | ||||||
3371 | return NVPTXISD::Suld1DArrayV2I64Zero; | ||||||
3372 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | ||||||
3373 | return NVPTXISD::Suld1DArrayV4I8Zero; | ||||||
3374 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | ||||||
3375 | return NVPTXISD::Suld1DArrayV4I16Zero; | ||||||
3376 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | ||||||
3377 | return NVPTXISD::Suld1DArrayV4I32Zero; | ||||||
3378 | case Intrinsic::nvvm_suld_2d_i8_zero: | ||||||
3379 | return NVPTXISD::Suld2DI8Zero; | ||||||
3380 | case Intrinsic::nvvm_suld_2d_i16_zero: | ||||||
3381 | return NVPTXISD::Suld2DI16Zero; | ||||||
3382 | case Intrinsic::nvvm_suld_2d_i32_zero: | ||||||
3383 | return NVPTXISD::Suld2DI32Zero; | ||||||
3384 | case Intrinsic::nvvm_suld_2d_i64_zero: | ||||||
3385 | return NVPTXISD::Suld2DI64Zero; | ||||||
3386 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | ||||||
3387 | return NVPTXISD::Suld2DV2I8Zero; | ||||||
3388 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | ||||||
3389 | return NVPTXISD::Suld2DV2I16Zero; | ||||||
3390 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | ||||||
3391 | return NVPTXISD::Suld2DV2I32Zero; | ||||||
3392 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | ||||||
3393 | return NVPTXISD::Suld2DV2I64Zero; | ||||||
3394 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | ||||||
3395 | return NVPTXISD::Suld2DV4I8Zero; | ||||||
3396 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | ||||||
3397 | return NVPTXISD::Suld2DV4I16Zero; | ||||||
3398 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | ||||||
3399 | return NVPTXISD::Suld2DV4I32Zero; | ||||||
3400 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | ||||||
3401 | return NVPTXISD::Suld2DArrayI8Zero; | ||||||
3402 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | ||||||
3403 | return NVPTXISD::Suld2DArrayI16Zero; | ||||||
3404 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | ||||||
3405 | return NVPTXISD::Suld2DArrayI32Zero; | ||||||
3406 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | ||||||
3407 | return NVPTXISD::Suld2DArrayI64Zero; | ||||||
3408 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | ||||||
3409 | return NVPTXISD::Suld2DArrayV2I8Zero; | ||||||
3410 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | ||||||
3411 | return NVPTXISD::Suld2DArrayV2I16Zero; | ||||||
3412 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | ||||||
3413 | return NVPTXISD::Suld2DArrayV2I32Zero; | ||||||
3414 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | ||||||
3415 | return NVPTXISD::Suld2DArrayV2I64Zero; | ||||||
3416 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | ||||||
3417 | return NVPTXISD::Suld2DArrayV4I8Zero; | ||||||
3418 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | ||||||
3419 | return NVPTXISD::Suld2DArrayV4I16Zero; | ||||||
3420 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | ||||||
3421 | return NVPTXISD::Suld2DArrayV4I32Zero; | ||||||
3422 | case Intrinsic::nvvm_suld_3d_i8_zero: | ||||||
3423 | return NVPTXISD::Suld3DI8Zero; | ||||||
3424 | case Intrinsic::nvvm_suld_3d_i16_zero: | ||||||
3425 | return NVPTXISD::Suld3DI16Zero; | ||||||
3426 | case Intrinsic::nvvm_suld_3d_i32_zero: | ||||||
3427 | return NVPTXISD::Suld3DI32Zero; | ||||||
3428 | case Intrinsic::nvvm_suld_3d_i64_zero: | ||||||
3429 | return NVPTXISD::Suld3DI64Zero; | ||||||
3430 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | ||||||
3431 | return NVPTXISD::Suld3DV2I8Zero; | ||||||
3432 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | ||||||
3433 | return NVPTXISD::Suld3DV2I16Zero; | ||||||
3434 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | ||||||
3435 | return NVPTXISD::Suld3DV2I32Zero; | ||||||
3436 | case Intrinsic::nvvm_suld_3d_v2i64_zero: | ||||||
3437 | return NVPTXISD::Suld3DV2I64Zero; | ||||||
3438 | case Intrinsic::nvvm_suld_3d_v4i8_zero: | ||||||
3439 | return NVPTXISD::Suld3DV4I8Zero; | ||||||
3440 | case Intrinsic::nvvm_suld_3d_v4i16_zero: | ||||||
3441 | return NVPTXISD::Suld3DV4I16Zero; | ||||||
3442 | case Intrinsic::nvvm_suld_3d_v4i32_zero: | ||||||
3443 | return NVPTXISD::Suld3DV4I32Zero; | ||||||
3444 | } | ||||||
3445 | } | ||||||
3446 | |||||||
3447 | // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as | ||||||
3448 | // TgtMemIntrinsic | ||||||
3449 | // because we need the information that is only available in the "Value" type | ||||||
3450 | // of destination | ||||||
3451 | // pointer. In particular, the address space information. | ||||||
3452 | bool NVPTXTargetLowering::getTgtMemIntrinsic( | ||||||
3453 | IntrinsicInfo &Info, const CallInst &I, | ||||||
3454 | MachineFunction &MF, unsigned Intrinsic) const { | ||||||
3455 | switch (Intrinsic) { | ||||||
3456 | default: | ||||||
3457 | return false; | ||||||
3458 | case Intrinsic::nvvm_match_all_sync_i32p: | ||||||
3459 | case Intrinsic::nvvm_match_all_sync_i64p: | ||||||
3460 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3461 | // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute | ||||||
3462 | // in order to model data exchange with other threads, but perform no real | ||||||
3463 | // memory accesses. | ||||||
3464 | Info.memVT = MVT::i1; | ||||||
3465 | |||||||
3466 | // Our result depends on both our and other thread's arguments. | ||||||
3467 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; | ||||||
3468 | return true; | ||||||
3469 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col: | ||||||
3470 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row: | ||||||
3471 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride: | ||||||
3472 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride: | ||||||
3473 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col: | ||||||
3474 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row: | ||||||
3475 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride: | ||||||
3476 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride: | ||||||
3477 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col: | ||||||
3478 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row: | ||||||
3479 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride: | ||||||
3480 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride: | ||||||
3481 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col: | ||||||
3482 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row: | ||||||
3483 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride: | ||||||
3484 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride: | ||||||
3485 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col: | ||||||
3486 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row: | ||||||
3487 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride: | ||||||
3488 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride: | ||||||
3489 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col: | ||||||
3490 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row: | ||||||
3491 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride: | ||||||
3492 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: { | ||||||
3493 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3494 | Info.memVT = MVT::v8f16; | ||||||
3495 | Info.ptrVal = I.getArgOperand(0); | ||||||
3496 | Info.offset = 0; | ||||||
3497 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3498 | Info.align = Align(16); | ||||||
3499 | return true; | ||||||
3500 | } | ||||||
3501 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col: | ||||||
3502 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride: | ||||||
3503 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride: | ||||||
3504 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col: | ||||||
3505 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row: | ||||||
3506 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride: | ||||||
3507 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride: | ||||||
3508 | case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row: | ||||||
3509 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col: | ||||||
3510 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride: | ||||||
3511 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride: | ||||||
3512 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col: | ||||||
3513 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row: | ||||||
3514 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride: | ||||||
3515 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride: | ||||||
3516 | case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: { | ||||||
3517 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3518 | Info.memVT = MVT::v2i32; | ||||||
3519 | Info.ptrVal = I.getArgOperand(0); | ||||||
3520 | Info.offset = 0; | ||||||
3521 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3522 | Info.align = Align(8); | ||||||
3523 | return true; | ||||||
3524 | } | ||||||
3525 | |||||||
3526 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col: | ||||||
3527 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride: | ||||||
3528 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride: | ||||||
3529 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col: | ||||||
3530 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row: | ||||||
3531 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride: | ||||||
3532 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride: | ||||||
3533 | case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row: | ||||||
3534 | |||||||
3535 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col: | ||||||
3536 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride: | ||||||
3537 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride: | ||||||
3538 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col: | ||||||
3539 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row: | ||||||
3540 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride: | ||||||
3541 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride: | ||||||
3542 | case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: { | ||||||
3543 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3544 | Info.memVT = MVT::v4i32; | ||||||
3545 | Info.ptrVal = I.getArgOperand(0); | ||||||
3546 | Info.offset = 0; | ||||||
3547 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3548 | Info.align = Align(16); | ||||||
3549 | return true; | ||||||
3550 | } | ||||||
3551 | |||||||
3552 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col: | ||||||
3553 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride: | ||||||
3554 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride: | ||||||
3555 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col: | ||||||
3556 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row: | ||||||
3557 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride: | ||||||
3558 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride: | ||||||
3559 | case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row: | ||||||
3560 | |||||||
3561 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col: | ||||||
3562 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride: | ||||||
3563 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride: | ||||||
3564 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col: | ||||||
3565 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row: | ||||||
3566 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride: | ||||||
3567 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride: | ||||||
3568 | case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row: | ||||||
3569 | case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row: | ||||||
3570 | case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride: | ||||||
3571 | case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col: | ||||||
3572 | case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride: | ||||||
3573 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row: | ||||||
3574 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride: | ||||||
3575 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride: | ||||||
3576 | case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row: | ||||||
3577 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col: | ||||||
3578 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride: | ||||||
3579 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride: | ||||||
3580 | case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: { | ||||||
3581 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3582 | Info.memVT = MVT::i32; | ||||||
3583 | Info.ptrVal = I.getArgOperand(0); | ||||||
3584 | Info.offset = 0; | ||||||
3585 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3586 | Info.align = Align(4); | ||||||
3587 | return true; | ||||||
3588 | } | ||||||
3589 | |||||||
3590 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col: | ||||||
3591 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row: | ||||||
3592 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride: | ||||||
3593 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride: | ||||||
3594 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col: | ||||||
3595 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row: | ||||||
3596 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride: | ||||||
3597 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride: | ||||||
3598 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col: | ||||||
3599 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row: | ||||||
3600 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride: | ||||||
3601 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: { | ||||||
3602 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3603 | Info.memVT = MVT::v4f16; | ||||||
3604 | Info.ptrVal = I.getArgOperand(0); | ||||||
3605 | Info.offset = 0; | ||||||
3606 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3607 | Info.align = Align(16); | ||||||
3608 | return true; | ||||||
3609 | } | ||||||
3610 | |||||||
3611 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col: | ||||||
3612 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row: | ||||||
3613 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride: | ||||||
3614 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride: | ||||||
3615 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col: | ||||||
3616 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row: | ||||||
3617 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride: | ||||||
3618 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride: | ||||||
3619 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col: | ||||||
3620 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row: | ||||||
3621 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride: | ||||||
3622 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: { | ||||||
3623 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3624 | Info.memVT = MVT::v8f32; | ||||||
3625 | Info.ptrVal = I.getArgOperand(0); | ||||||
3626 | Info.offset = 0; | ||||||
3627 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3628 | Info.align = Align(16); | ||||||
3629 | return true; | ||||||
3630 | } | ||||||
3631 | |||||||
3632 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col: | ||||||
3633 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride: | ||||||
3634 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row: | ||||||
3635 | case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride: | ||||||
3636 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col: | ||||||
3637 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride: | ||||||
3638 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row: | ||||||
3639 | case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride: | ||||||
3640 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col: | ||||||
3641 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride: | ||||||
3642 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row: | ||||||
3643 | case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: { | ||||||
3644 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3645 | Info.memVT = MVT::v8i32; | ||||||
3646 | Info.ptrVal = I.getArgOperand(0); | ||||||
3647 | Info.offset = 0; | ||||||
3648 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3649 | Info.align = Align(16); | ||||||
3650 | return true; | ||||||
3651 | } | ||||||
3652 | |||||||
3653 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col: | ||||||
3654 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride: | ||||||
3655 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row: | ||||||
3656 | case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride: | ||||||
3657 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col: | ||||||
3658 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride: | ||||||
3659 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row: | ||||||
3660 | case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: { | ||||||
3661 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3662 | Info.memVT = MVT::v2i32; | ||||||
3663 | Info.ptrVal = I.getArgOperand(0); | ||||||
3664 | Info.offset = 0; | ||||||
3665 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3666 | Info.align = Align(8); | ||||||
3667 | return true; | ||||||
3668 | } | ||||||
3669 | |||||||
3670 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col: | ||||||
3671 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row: | ||||||
3672 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride: | ||||||
3673 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride: | ||||||
3674 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col: | ||||||
3675 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row: | ||||||
3676 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride: | ||||||
3677 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride: | ||||||
3678 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col: | ||||||
3679 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row: | ||||||
3680 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride: | ||||||
3681 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: { | ||||||
3682 | Info.opc = ISD::INTRINSIC_VOID; | ||||||
3683 | Info.memVT = MVT::v4f16; | ||||||
3684 | Info.ptrVal = I.getArgOperand(0); | ||||||
3685 | Info.offset = 0; | ||||||
3686 | Info.flags = MachineMemOperand::MOStore; | ||||||
3687 | Info.align = Align(16); | ||||||
3688 | return true; | ||||||
3689 | } | ||||||
3690 | |||||||
3691 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col: | ||||||
3692 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row: | ||||||
3693 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride: | ||||||
3694 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride: | ||||||
3695 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col: | ||||||
3696 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row: | ||||||
3697 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride: | ||||||
3698 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride: | ||||||
3699 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col: | ||||||
3700 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row: | ||||||
3701 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride: | ||||||
3702 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: { | ||||||
3703 | Info.opc = ISD::INTRINSIC_VOID; | ||||||
3704 | Info.memVT = MVT::v8f32; | ||||||
3705 | Info.ptrVal = I.getArgOperand(0); | ||||||
3706 | Info.offset = 0; | ||||||
3707 | Info.flags = MachineMemOperand::MOStore; | ||||||
3708 | Info.align = Align(16); | ||||||
3709 | return true; | ||||||
3710 | } | ||||||
3711 | |||||||
3712 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col: | ||||||
3713 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride: | ||||||
3714 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row: | ||||||
3715 | case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride: | ||||||
3716 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col: | ||||||
3717 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride: | ||||||
3718 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row: | ||||||
3719 | case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride: | ||||||
3720 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col: | ||||||
3721 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride: | ||||||
3722 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row: | ||||||
3723 | case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: { | ||||||
3724 | Info.opc = ISD::INTRINSIC_VOID; | ||||||
3725 | Info.memVT = MVT::v8i32; | ||||||
3726 | Info.ptrVal = I.getArgOperand(0); | ||||||
3727 | Info.offset = 0; | ||||||
3728 | Info.flags = MachineMemOperand::MOStore; | ||||||
3729 | Info.align = Align(16); | ||||||
3730 | return true; | ||||||
3731 | } | ||||||
3732 | |||||||
3733 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col: | ||||||
3734 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride: | ||||||
3735 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row: | ||||||
3736 | case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride: | ||||||
3737 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col: | ||||||
3738 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride: | ||||||
3739 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row: | ||||||
3740 | case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: { | ||||||
3741 | Info.opc = ISD::INTRINSIC_VOID; | ||||||
3742 | Info.memVT = MVT::v2i32; | ||||||
3743 | Info.ptrVal = I.getArgOperand(0); | ||||||
3744 | Info.offset = 0; | ||||||
3745 | Info.flags = MachineMemOperand::MOStore; | ||||||
3746 | Info.align = Align(8); | ||||||
3747 | return true; | ||||||
3748 | } | ||||||
3749 | |||||||
3750 | case Intrinsic::nvvm_atomic_load_inc_32: | ||||||
3751 | case Intrinsic::nvvm_atomic_load_dec_32: | ||||||
3752 | |||||||
3753 | case Intrinsic::nvvm_atomic_add_gen_f_cta: | ||||||
3754 | case Intrinsic::nvvm_atomic_add_gen_f_sys: | ||||||
3755 | case Intrinsic::nvvm_atomic_add_gen_i_cta: | ||||||
3756 | case Intrinsic::nvvm_atomic_add_gen_i_sys: | ||||||
3757 | case Intrinsic::nvvm_atomic_and_gen_i_cta: | ||||||
3758 | case Intrinsic::nvvm_atomic_and_gen_i_sys: | ||||||
3759 | case Intrinsic::nvvm_atomic_cas_gen_i_cta: | ||||||
3760 | case Intrinsic::nvvm_atomic_cas_gen_i_sys: | ||||||
3761 | case Intrinsic::nvvm_atomic_dec_gen_i_cta: | ||||||
3762 | case Intrinsic::nvvm_atomic_dec_gen_i_sys: | ||||||
3763 | case Intrinsic::nvvm_atomic_inc_gen_i_cta: | ||||||
3764 | case Intrinsic::nvvm_atomic_inc_gen_i_sys: | ||||||
3765 | case Intrinsic::nvvm_atomic_max_gen_i_cta: | ||||||
3766 | case Intrinsic::nvvm_atomic_max_gen_i_sys: | ||||||
3767 | case Intrinsic::nvvm_atomic_min_gen_i_cta: | ||||||
3768 | case Intrinsic::nvvm_atomic_min_gen_i_sys: | ||||||
3769 | case Intrinsic::nvvm_atomic_or_gen_i_cta: | ||||||
3770 | case Intrinsic::nvvm_atomic_or_gen_i_sys: | ||||||
3771 | case Intrinsic::nvvm_atomic_exch_gen_i_cta: | ||||||
3772 | case Intrinsic::nvvm_atomic_exch_gen_i_sys: | ||||||
3773 | case Intrinsic::nvvm_atomic_xor_gen_i_cta: | ||||||
3774 | case Intrinsic::nvvm_atomic_xor_gen_i_sys: { | ||||||
3775 | auto &DL = I.getModule()->getDataLayout(); | ||||||
3776 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3777 | Info.memVT = getValueType(DL, I.getType()); | ||||||
3778 | Info.ptrVal = I.getArgOperand(0); | ||||||
3779 | Info.offset = 0; | ||||||
3780 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; | ||||||
3781 | Info.align.reset(); | ||||||
3782 | return true; | ||||||
3783 | } | ||||||
3784 | |||||||
3785 | case Intrinsic::nvvm_ldu_global_i: | ||||||
3786 | case Intrinsic::nvvm_ldu_global_f: | ||||||
3787 | case Intrinsic::nvvm_ldu_global_p: { | ||||||
3788 | auto &DL = I.getModule()->getDataLayout(); | ||||||
3789 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3790 | if (Intrinsic == Intrinsic::nvvm_ldu_global_i) | ||||||
3791 | Info.memVT = getValueType(DL, I.getType()); | ||||||
3792 | else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) | ||||||
3793 | Info.memVT = getPointerTy(DL); | ||||||
3794 | else | ||||||
3795 | Info.memVT = getValueType(DL, I.getType()); | ||||||
3796 | Info.ptrVal = I.getArgOperand(0); | ||||||
3797 | Info.offset = 0; | ||||||
3798 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3799 | Info.align = | ||||||
3800 | MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); | ||||||
3801 | |||||||
3802 | return true; | ||||||
3803 | } | ||||||
3804 | case Intrinsic::nvvm_ldg_global_i: | ||||||
3805 | case Intrinsic::nvvm_ldg_global_f: | ||||||
3806 | case Intrinsic::nvvm_ldg_global_p: { | ||||||
3807 | auto &DL = I.getModule()->getDataLayout(); | ||||||
3808 | |||||||
3809 | Info.opc = ISD::INTRINSIC_W_CHAIN; | ||||||
3810 | if (Intrinsic == Intrinsic::nvvm_ldg_global_i) | ||||||
3811 | Info.memVT = getValueType(DL, I.getType()); | ||||||
3812 | else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) | ||||||
3813 | Info.memVT = getPointerTy(DL); | ||||||
3814 | else | ||||||
3815 | Info.memVT = getValueType(DL, I.getType()); | ||||||
3816 | Info.ptrVal = I.getArgOperand(0); | ||||||
3817 | Info.offset = 0; | ||||||
3818 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3819 | Info.align = | ||||||
3820 | MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); | ||||||
3821 | |||||||
3822 | return true; | ||||||
3823 | } | ||||||
3824 | |||||||
3825 | case Intrinsic::nvvm_tex_1d_v4f32_s32: | ||||||
3826 | case Intrinsic::nvvm_tex_1d_v4f32_f32: | ||||||
3827 | case Intrinsic::nvvm_tex_1d_level_v4f32_f32: | ||||||
3828 | case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: | ||||||
3829 | case Intrinsic::nvvm_tex_1d_array_v4f32_s32: | ||||||
3830 | case Intrinsic::nvvm_tex_1d_array_v4f32_f32: | ||||||
3831 | case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: | ||||||
3832 | case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: | ||||||
3833 | case Intrinsic::nvvm_tex_2d_v4f32_s32: | ||||||
3834 | case Intrinsic::nvvm_tex_2d_v4f32_f32: | ||||||
3835 | case Intrinsic::nvvm_tex_2d_level_v4f32_f32: | ||||||
3836 | case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: | ||||||
3837 | case Intrinsic::nvvm_tex_2d_array_v4f32_s32: | ||||||
3838 | case Intrinsic::nvvm_tex_2d_array_v4f32_f32: | ||||||
3839 | case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: | ||||||
3840 | case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: | ||||||
3841 | case Intrinsic::nvvm_tex_3d_v4f32_s32: | ||||||
3842 | case Intrinsic::nvvm_tex_3d_v4f32_f32: | ||||||
3843 | case Intrinsic::nvvm_tex_3d_level_v4f32_f32: | ||||||
3844 | case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: | ||||||
3845 | case Intrinsic::nvvm_tex_cube_v4f32_f32: | ||||||
3846 | case Intrinsic::nvvm_tex_cube_level_v4f32_f32: | ||||||
3847 | case Intrinsic::nvvm_tex_cube_array_v4f32_f32: | ||||||
3848 | case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: | ||||||
3849 | case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: | ||||||
3850 | case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: | ||||||
3851 | case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: | ||||||
3852 | case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: | ||||||
3853 | case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: | ||||||
3854 | case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: | ||||||
3855 | case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: | ||||||
3856 | case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: | ||||||
3857 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: | ||||||
3858 | case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: | ||||||
3859 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: | ||||||
3860 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: | ||||||
3861 | case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: | ||||||
3862 | case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: | ||||||
3863 | case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: | ||||||
3864 | case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: | ||||||
3865 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: | ||||||
3866 | case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: | ||||||
3867 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: | ||||||
3868 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: | ||||||
3869 | case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: | ||||||
3870 | case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: | ||||||
3871 | case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: | ||||||
3872 | case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: | ||||||
3873 | case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: | ||||||
3874 | case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: | ||||||
3875 | case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: | ||||||
3876 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: | ||||||
3877 | case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: | ||||||
3878 | case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: | ||||||
3879 | case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: | ||||||
3880 | case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: | ||||||
3881 | Info.opc = getOpcForTextureInstr(Intrinsic); | ||||||
3882 | Info.memVT = MVT::v4f32; | ||||||
3883 | Info.ptrVal = nullptr; | ||||||
3884 | Info.offset = 0; | ||||||
3885 | Info.flags = MachineMemOperand::MOLoad; | ||||||
3886 | Info.align = Align(16); | ||||||
3887 | return true; | ||||||
3888 | |||||||
3889 | case Intrinsic::nvvm_tex_1d_v4s32_s32: | ||||||
3890 | case Intrinsic::nvvm_tex_1d_v4s32_f32: | ||||||
3891 | case Intrinsic::nvvm_tex_1d_level_v4s32_f32: | ||||||
3892 | case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: | ||||||
3893 | case Intrinsic::nvvm_tex_1d_array_v4s32_s32: | ||||||
3894 | case Intrinsic::nvvm_tex_1d_array_v4s32_f32: | ||||||
3895 | case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: | ||||||
3896 | case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: | ||||||
3897 | case Intrinsic::nvvm_tex_2d_v4s32_s32: | ||||||
3898 | case Intrinsic::nvvm_tex_2d_v4s32_f32: | ||||||
3899 | case Intrinsic::nvvm_tex_2d_level_v4s32_f32: | ||||||
3900 | case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: | ||||||
3901 | case Intrinsic::nvvm_tex_2d_array_v4s32_s32: | ||||||
3902 | case Intrinsic::nvvm_tex_2d_array_v4s32_f32: | ||||||
3903 | case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: | ||||||
3904 | case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: | ||||||
3905 | case Intrinsic::nvvm_tex_3d_v4s32_s32: | ||||||
3906 | case Intrinsic::nvvm_tex_3d_v4s32_f32: | ||||||
3907 | case Intrinsic::nvvm_tex_3d_level_v4s32_f32: | ||||||
3908 | case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: | ||||||
3909 | case Intrinsic::nvvm_tex_cube_v4s32_f32: | ||||||
3910 | case Intrinsic::nvvm_tex_cube_level_v4s32_f32: | ||||||
3911 | case Intrinsic::nvvm_tex_cube_array_v4s32_f32: | ||||||
3912 | case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: | ||||||
3913 | case Intrinsic::nvvm_tex_cube_v4u32_f32: | ||||||
3914 | case Intrinsic::nvvm_tex_cube_level_v4u32_f32: | ||||||
3915 | case Intrinsic::nvvm_tex_cube_array_v4u32_f32: | ||||||
3916 | case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: | ||||||
3917 | case Intrinsic::nvvm_tex_1d_v4u32_s32: | ||||||
3918 | case Intrinsic::nvvm_tex_1d_v4u32_f32: | ||||||
3919 | case Intrinsic::nvvm_tex_1d_level_v4u32_f32: | ||||||
3920 | case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: | ||||||
3921 | case Intrinsic::nvvm_tex_1d_array_v4u32_s32: | ||||||
3922 | case Intrinsic::nvvm_tex_1d_array_v4u32_f32: | ||||||
3923 | case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: | ||||||
3924 | case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: | ||||||
3925 | case Intrinsic::nvvm_tex_2d_v4u32_s32: | ||||||
3926 | case Intrinsic::nvvm_tex_2d_v4u32_f32: | ||||||
3927 | case Intrinsic::nvvm_tex_2d_level_v4u32_f32: | ||||||
3928 | case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: | ||||||
3929 | case Intrinsic::nvvm_tex_2d_array_v4u32_s32: | ||||||
3930 | case Intrinsic::nvvm_tex_2d_array_v4u32_f32: | ||||||
3931 | case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: | ||||||
3932 | case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: | ||||||
3933 | case Intrinsic::nvvm_tex_3d_v4u32_s32: | ||||||
3934 | case Intrinsic::nvvm_tex_3d_v4u32_f32: | ||||||
3935 | case Intrinsic::nvvm_tex_3d_level_v4u32_f32: | ||||||
3936 | case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: | ||||||
3937 | case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: | ||||||
3938 | case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: | ||||||
3939 | case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: | ||||||
3940 | case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: | ||||||
3941 | case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: | ||||||
3942 | case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: | ||||||
3943 | case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: | ||||||
3944 | case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: | ||||||
3945 | case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: | ||||||
3946 | case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: | ||||||
3947 | case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: | ||||||
3948 | case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: | ||||||
3949 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: | ||||||
3950 | case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: | ||||||
3951 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: | ||||||
3952 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: | ||||||
3953 | case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: | ||||||
3954 | case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: | ||||||
3955 | case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: | ||||||
3956 | case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: | ||||||
3957 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: | ||||||
3958 | case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: | ||||||
3959 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: | ||||||
3960 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: | ||||||
3961 | case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: | ||||||
3962 | case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: | ||||||
3963 | case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: | ||||||
3964 | case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: | ||||||
3965 | case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: | ||||||
3966 | case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: | ||||||
3967 | case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: | ||||||
3968 | case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: | ||||||
3969 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: | ||||||
3970 | case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: | ||||||
3971 | case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: | ||||||
3972 | case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: | ||||||
3973 | case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: | ||||||
3974 | case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: | ||||||
3975 | case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: | ||||||
3976 | case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: | ||||||
3977 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: | ||||||
3978 | case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: | ||||||
3979 | case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: | ||||||
3980 | case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: | ||||||
3981 | case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: | ||||||
3982 | case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: | ||||||
3983 | case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: | ||||||
3984 | case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: | ||||||
3985 | case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: | ||||||
3986 | case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: | ||||||
3987 | case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: | ||||||
3988 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: | ||||||
3989 | case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: | ||||||
3990 | case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: | ||||||
3991 | case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: | ||||||
3992 | case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: | ||||||
3993 | case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: | ||||||
3994 | case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: | ||||||
3995 | case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: | ||||||
3996 | case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: | ||||||
3997 | case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: | ||||||
3998 | case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: | ||||||
3999 | case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: | ||||||
4000 | case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: | ||||||
4001 | Info.opc = getOpcForTextureInstr(Intrinsic); | ||||||
4002 | Info.memVT = MVT::v4i32; | ||||||
4003 | Info.ptrVal = nullptr; | ||||||
4004 | Info.offset = 0; | ||||||
4005 | Info.flags = MachineMemOperand::MOLoad; | ||||||
4006 | Info.align = Align(16); | ||||||
4007 | return true; | ||||||
4008 | |||||||
4009 | case Intrinsic::nvvm_suld_1d_i8_clamp: | ||||||
4010 | case Intrinsic::nvvm_suld_1d_v2i8_clamp: | ||||||
4011 | case Intrinsic::nvvm_suld_1d_v4i8_clamp: | ||||||
4012 | case Intrinsic::nvvm_suld_1d_array_i8_clamp: | ||||||
4013 | case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: | ||||||
4014 | case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: | ||||||
4015 | case Intrinsic::nvvm_suld_2d_i8_clamp: | ||||||
4016 | case Intrinsic::nvvm_suld_2d_v2i8_clamp: | ||||||
4017 | case Intrinsic::nvvm_suld_2d_v4i8_clamp: | ||||||
4018 | case Intrinsic::nvvm_suld_2d_array_i8_clamp: | ||||||
4019 | case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: | ||||||
4020 | case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: | ||||||
4021 | case Intrinsic::nvvm_suld_3d_i8_clamp: | ||||||
4022 | case Intrinsic::nvvm_suld_3d_v2i8_clamp: | ||||||
4023 | case Intrinsic::nvvm_suld_3d_v4i8_clamp: | ||||||
4024 | case Intrinsic::nvvm_suld_1d_i8_trap: | ||||||
4025 | case Intrinsic::nvvm_suld_1d_v2i8_trap: | ||||||
4026 | case Intrinsic::nvvm_suld_1d_v4i8_trap: | ||||||
4027 | case Intrinsic::nvvm_suld_1d_array_i8_trap: | ||||||
4028 | case Intrinsic::nvvm_suld_1d_array_v2i8_trap: | ||||||
4029 | case Intrinsic::nvvm_suld_1d_array_v4i8_trap: | ||||||
4030 | case Intrinsic::nvvm_suld_2d_i8_trap: | ||||||
4031 | case Intrinsic::nvvm_suld_2d_v2i8_trap: | ||||||
4032 | case Intrinsic::nvvm_suld_2d_v4i8_trap: | ||||||
4033 | case Intrinsic::nvvm_suld_2d_array_i8_trap: | ||||||
4034 | case Intrinsic::nvvm_suld_2d_array_v2i8_trap: | ||||||
4035 | case Intrinsic::nvvm_suld_2d_array_v4i8_trap: | ||||||
4036 | case Intrinsic::nvvm_suld_3d_i8_trap: | ||||||
4037 | case Intrinsic::nvvm_suld_3d_v2i8_trap: | ||||||
4038 | case Intrinsic::nvvm_suld_3d_v4i8_trap: | ||||||
4039 | case Intrinsic::nvvm_suld_1d_i8_zero: | ||||||
4040 | case Intrinsic::nvvm_suld_1d_v2i8_zero: | ||||||
4041 | case Intrinsic::nvvm_suld_1d_v4i8_zero: | ||||||
4042 | case Intrinsic::nvvm_suld_1d_array_i8_zero: | ||||||
4043 | case Intrinsic::nvvm_suld_1d_array_v2i8_zero: | ||||||
4044 | case Intrinsic::nvvm_suld_1d_array_v4i8_zero: | ||||||
4045 | case Intrinsic::nvvm_suld_2d_i8_zero: | ||||||
4046 | case Intrinsic::nvvm_suld_2d_v2i8_zero: | ||||||
4047 | case Intrinsic::nvvm_suld_2d_v4i8_zero: | ||||||
4048 | case Intrinsic::nvvm_suld_2d_array_i8_zero: | ||||||
4049 | case Intrinsic::nvvm_suld_2d_array_v2i8_zero: | ||||||
4050 | case Intrinsic::nvvm_suld_2d_array_v4i8_zero: | ||||||
4051 | case Intrinsic::nvvm_suld_3d_i8_zero: | ||||||
4052 | case Intrinsic::nvvm_suld_3d_v2i8_zero: | ||||||
4053 | case Intrinsic::nvvm_suld_3d_v4i8_zero: | ||||||
4054 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | ||||||
4055 | Info.memVT = MVT::i8; | ||||||
4056 | Info.ptrVal = nullptr; | ||||||
4057 | Info.offset = 0; | ||||||
4058 | Info.flags = MachineMemOperand::MOLoad; | ||||||
4059 | Info.align = Align(16); | ||||||
4060 | return true; | ||||||
4061 | |||||||
4062 | case Intrinsic::nvvm_suld_1d_i16_clamp: | ||||||
4063 | case Intrinsic::nvvm_suld_1d_v2i16_clamp: | ||||||
4064 | case Intrinsic::nvvm_suld_1d_v4i16_clamp: | ||||||
4065 | case Intrinsic::nvvm_suld_1d_array_i16_clamp: | ||||||
4066 | case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: | ||||||
4067 | case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: | ||||||
4068 | case Intrinsic::nvvm_suld_2d_i16_clamp: | ||||||
4069 | case Intrinsic::nvvm_suld_2d_v2i16_clamp: | ||||||
4070 | case Intrinsic::nvvm_suld_2d_v4i16_clamp: | ||||||
4071 | case Intrinsic::nvvm_suld_2d_array_i16_clamp: | ||||||
4072 | case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: | ||||||
4073 | case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: | ||||||
4074 | case Intrinsic::nvvm_suld_3d_i16_clamp: | ||||||
4075 | case Intrinsic::nvvm_suld_3d_v2i16_clamp: | ||||||
4076 | case Intrinsic::nvvm_suld_3d_v4i16_clamp: | ||||||
4077 | case Intrinsic::nvvm_suld_1d_i16_trap: | ||||||
4078 | case Intrinsic::nvvm_suld_1d_v2i16_trap: | ||||||
4079 | case Intrinsic::nvvm_suld_1d_v4i16_trap: | ||||||
4080 | case Intrinsic::nvvm_suld_1d_array_i16_trap: | ||||||
4081 | case Intrinsic::nvvm_suld_1d_array_v2i16_trap: | ||||||
4082 | case Intrinsic::nvvm_suld_1d_array_v4i16_trap: | ||||||
4083 | case Intrinsic::nvvm_suld_2d_i16_trap: | ||||||
4084 | case Intrinsic::nvvm_suld_2d_v2i16_trap: | ||||||
4085 | case Intrinsic::nvvm_suld_2d_v4i16_trap: | ||||||
4086 | case Intrinsic::nvvm_suld_2d_array_i16_trap: | ||||||
4087 | case Intrinsic::nvvm_suld_2d_array_v2i16_trap: | ||||||
4088 | case Intrinsic::nvvm_suld_2d_array_v4i16_trap: | ||||||
4089 | case Intrinsic::nvvm_suld_3d_i16_trap: | ||||||
4090 | case Intrinsic::nvvm_suld_3d_v2i16_trap: | ||||||
4091 | case Intrinsic::nvvm_suld_3d_v4i16_trap: | ||||||
4092 | case Intrinsic::nvvm_suld_1d_i16_zero: | ||||||
4093 | case Intrinsic::nvvm_suld_1d_v2i16_zero: | ||||||
4094 | case Intrinsic::nvvm_suld_1d_v4i16_zero: | ||||||
4095 | case Intrinsic::nvvm_suld_1d_array_i16_zero: | ||||||
4096 | case Intrinsic::nvvm_suld_1d_array_v2i16_zero: | ||||||
4097 | case Intrinsic::nvvm_suld_1d_array_v4i16_zero: | ||||||
4098 | case Intrinsic::nvvm_suld_2d_i16_zero: | ||||||
4099 | case Intrinsic::nvvm_suld_2d_v2i16_zero: | ||||||
4100 | case Intrinsic::nvvm_suld_2d_v4i16_zero: | ||||||
4101 | case Intrinsic::nvvm_suld_2d_array_i16_zero: | ||||||
4102 | case Intrinsic::nvvm_suld_2d_array_v2i16_zero: | ||||||
4103 | case Intrinsic::nvvm_suld_2d_array_v4i16_zero: | ||||||
4104 | case Intrinsic::nvvm_suld_3d_i16_zero: | ||||||
4105 | case Intrinsic::nvvm_suld_3d_v2i16_zero: | ||||||
4106 | case Intrinsic::nvvm_suld_3d_v4i16_zero: | ||||||
4107 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | ||||||
4108 | Info.memVT = MVT::i16; | ||||||
4109 | Info.ptrVal = nullptr; | ||||||
4110 | Info.offset = 0; | ||||||
4111 | Info.flags = MachineMemOperand::MOLoad; | ||||||
4112 | Info.align = Align(16); | ||||||
4113 | return true; | ||||||
4114 | |||||||
4115 | case Intrinsic::nvvm_suld_1d_i32_clamp: | ||||||
4116 | case Intrinsic::nvvm_suld_1d_v2i32_clamp: | ||||||
4117 | case Intrinsic::nvvm_suld_1d_v4i32_clamp: | ||||||
4118 | case Intrinsic::nvvm_suld_1d_array_i32_clamp: | ||||||
4119 | case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: | ||||||
4120 | case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: | ||||||
4121 | case Intrinsic::nvvm_suld_2d_i32_clamp: | ||||||
4122 | case Intrinsic::nvvm_suld_2d_v2i32_clamp: | ||||||
4123 | case Intrinsic::nvvm_suld_2d_v4i32_clamp: | ||||||
4124 | case Intrinsic::nvvm_suld_2d_array_i32_clamp: | ||||||
4125 | case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: | ||||||
4126 | case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: | ||||||
4127 | case Intrinsic::nvvm_suld_3d_i32_clamp: | ||||||
4128 | case Intrinsic::nvvm_suld_3d_v2i32_clamp: | ||||||
4129 | case Intrinsic::nvvm_suld_3d_v4i32_clamp: | ||||||
4130 | case Intrinsic::nvvm_suld_1d_i32_trap: | ||||||
4131 | case Intrinsic::nvvm_suld_1d_v2i32_trap: | ||||||
4132 | case Intrinsic::nvvm_suld_1d_v4i32_trap: | ||||||
4133 | case Intrinsic::nvvm_suld_1d_array_i32_trap: | ||||||
4134 | case Intrinsic::nvvm_suld_1d_array_v2i32_trap: | ||||||
4135 | case Intrinsic::nvvm_suld_1d_array_v4i32_trap: | ||||||
4136 | case Intrinsic::nvvm_suld_2d_i32_trap: | ||||||
4137 | case Intrinsic::nvvm_suld_2d_v2i32_trap: | ||||||
4138 | case Intrinsic::nvvm_suld_2d_v4i32_trap: | ||||||
4139 | case Intrinsic::nvvm_suld_2d_array_i32_trap: | ||||||
4140 | case Intrinsic::nvvm_suld_2d_array_v2i32_trap: | ||||||
4141 | case Intrinsic::nvvm_suld_2d_array_v4i32_trap: | ||||||
4142 | case Intrinsic::nvvm_suld_3d_i32_trap: | ||||||
4143 | case Intrinsic::nvvm_suld_3d_v2i32_trap: | ||||||
4144 | case Intrinsic::nvvm_suld_3d_v4i32_trap: | ||||||
4145 | case Intrinsic::nvvm_suld_1d_i32_zero: | ||||||
4146 | case Intrinsic::nvvm_suld_1d_v2i32_zero: | ||||||
4147 | case Intrinsic::nvvm_suld_1d_v4i32_zero: | ||||||
4148 | case Intrinsic::nvvm_suld_1d_array_i32_zero: | ||||||
4149 | case Intrinsic::nvvm_suld_1d_array_v2i32_zero: | ||||||
4150 | case Intrinsic::nvvm_suld_1d_array_v4i32_zero: | ||||||
4151 | case Intrinsic::nvvm_suld_2d_i32_zero: | ||||||
4152 | case Intrinsic::nvvm_suld_2d_v2i32_zero: | ||||||
4153 | case Intrinsic::nvvm_suld_2d_v4i32_zero: | ||||||
4154 | case Intrinsic::nvvm_suld_2d_array_i32_zero: | ||||||
4155 | case Intrinsic::nvvm_suld_2d_array_v2i32_zero: | ||||||
4156 | case Intrinsic::nvvm_suld_2d_array_v4i32_zero: | ||||||
4157 | case Intrinsic::nvvm_suld_3d_i32_zero: | ||||||
4158 | case Intrinsic::nvvm_suld_3d_v2i32_zero: | ||||||
4159 | case Intrinsic::nvvm_suld_3d_v4i32_zero: | ||||||
4160 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | ||||||
4161 | Info.memVT = MVT::i32; | ||||||
4162 | Info.ptrVal = nullptr; | ||||||
4163 | Info.offset = 0; | ||||||
4164 | Info.flags = MachineMemOperand::MOLoad; | ||||||
4165 | Info.align = Align(16); | ||||||
4166 | return true; | ||||||
4167 | |||||||
4168 | case Intrinsic::nvvm_suld_1d_i64_clamp: | ||||||
4169 | case Intrinsic::nvvm_suld_1d_v2i64_clamp: | ||||||
4170 | case Intrinsic::nvvm_suld_1d_array_i64_clamp: | ||||||
4171 | case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: | ||||||
4172 | case Intrinsic::nvvm_suld_2d_i64_clamp: | ||||||
4173 | case Intrinsic::nvvm_suld_2d_v2i64_clamp: | ||||||
4174 | case Intrinsic::nvvm_suld_2d_array_i64_clamp: | ||||||
4175 | case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: | ||||||
4176 | case Intrinsic::nvvm_suld_3d_i64_clamp: | ||||||
4177 | case Intrinsic::nvvm_suld_3d_v2i64_clamp: | ||||||
4178 | case Intrinsic::nvvm_suld_1d_i64_trap: | ||||||
4179 | case Intrinsic::nvvm_suld_1d_v2i64_trap: | ||||||
4180 | case Intrinsic::nvvm_suld_1d_array_i64_trap: | ||||||
4181 | case Intrinsic::nvvm_suld_1d_array_v2i64_trap: | ||||||
4182 | case Intrinsic::nvvm_suld_2d_i64_trap: | ||||||
4183 | case Intrinsic::nvvm_suld_2d_v2i64_trap: | ||||||
4184 | case Intrinsic::nvvm_suld_2d_array_i64_trap: | ||||||
4185 | case Intrinsic::nvvm_suld_2d_array_v2i64_trap: | ||||||
4186 | case Intrinsic::nvvm_suld_3d_i64_trap: | ||||||
4187 | case Intrinsic::nvvm_suld_3d_v2i64_trap: | ||||||
4188 | case Intrinsic::nvvm_suld_1d_i64_zero: | ||||||
4189 | case Intrinsic::nvvm_suld_1d_v2i64_zero: | ||||||
4190 | case Intrinsic::nvvm_suld_1d_array_i64_zero: | ||||||
4191 | case Intrinsic::nvvm_suld_1d_array_v2i64_zero: | ||||||
4192 | case Intrinsic::nvvm_suld_2d_i64_zero: | ||||||
4193 | case Intrinsic::nvvm_suld_2d_v2i64_zero: | ||||||
4194 | case Intrinsic::nvvm_suld_2d_array_i64_zero: | ||||||
4195 | case Intrinsic::nvvm_suld_2d_array_v2i64_zero: | ||||||
4196 | case Intrinsic::nvvm_suld_3d_i64_zero: | ||||||
4197 | case Intrinsic::nvvm_suld_3d_v2i64_zero: | ||||||
4198 | Info.opc = getOpcForSurfaceInstr(Intrinsic); | ||||||
4199 | Info.memVT = MVT::i64; | ||||||
4200 | Info.ptrVal = nullptr; | ||||||
4201 | Info.offset = 0; | ||||||
4202 | Info.flags = MachineMemOperand::MOLoad; | ||||||
4203 | Info.align = Align(16); | ||||||
4204 | return true; | ||||||
4205 | } | ||||||
4206 | return false; | ||||||
4207 | } | ||||||
4208 | |||||||
4209 | /// isLegalAddressingMode - Return true if the addressing mode represented | ||||||
4210 | /// by AM is legal for this target, for a load/store of the specified type. | ||||||
4211 | /// Used to guide target specific optimizations, like loop strength reduction | ||||||
4212 | /// (LoopStrengthReduce.cpp) and memory optimization for address mode | ||||||
4213 | /// (CodeGenPrepare.cpp) | ||||||
4214 | bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, | ||||||
4215 | const AddrMode &AM, Type *Ty, | ||||||
4216 | unsigned AS, Instruction *I) const { | ||||||
4217 | // AddrMode - This represents an addressing mode of: | ||||||
4218 | // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | ||||||
4219 | // | ||||||
4220 | // The legal address modes are | ||||||
4221 | // - [avar] | ||||||
4222 | // - [areg] | ||||||
4223 | // - [areg+immoff] | ||||||
4224 | // - [immAddr] | ||||||
4225 | |||||||
4226 | if (AM.BaseGV) { | ||||||
4227 | return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; | ||||||
4228 | } | ||||||
4229 | |||||||
4230 | switch (AM.Scale) { | ||||||
4231 | case 0: // "r", "r+i" or "i" is allowed | ||||||
4232 | break; | ||||||
4233 | case 1: | ||||||
4234 | if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. | ||||||
4235 | return false; | ||||||
4236 | // Otherwise we have r+i. | ||||||
4237 | break; | ||||||
4238 | default: | ||||||
4239 | // No scale > 1 is allowed | ||||||
4240 | return false; | ||||||
4241 | } | ||||||
4242 | return true; | ||||||
4243 | } | ||||||
4244 | |||||||
4245 | //===----------------------------------------------------------------------===// | ||||||
4246 | // NVPTX Inline Assembly Support | ||||||
4247 | //===----------------------------------------------------------------------===// | ||||||
4248 | |||||||
4249 | /// getConstraintType - Given a constraint letter, return the type of | ||||||
4250 | /// constraint it is for this target. | ||||||
4251 | NVPTXTargetLowering::ConstraintType | ||||||
4252 | NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { | ||||||
4253 | if (Constraint.size() == 1) { | ||||||
4254 | switch (Constraint[0]) { | ||||||
4255 | default: | ||||||
4256 | break; | ||||||
4257 | case 'b': | ||||||
4258 | case 'r': | ||||||
4259 | case 'h': | ||||||
4260 | case 'c': | ||||||
4261 | case 'l': | ||||||
4262 | case 'f': | ||||||
4263 | case 'd': | ||||||
4264 | case '0': | ||||||
4265 | case 'N': | ||||||
4266 | return C_RegisterClass; | ||||||
4267 | } | ||||||
4268 | } | ||||||
4269 | return TargetLowering::getConstraintType(Constraint); | ||||||
4270 | } | ||||||
4271 | |||||||
4272 | std::pair<unsigned, const TargetRegisterClass *> | ||||||
4273 | NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, | ||||||
4274 | StringRef Constraint, | ||||||
4275 | MVT VT) const { | ||||||
4276 | if (Constraint.size() == 1) { | ||||||
4277 | switch (Constraint[0]) { | ||||||
4278 | case 'b': | ||||||
4279 | return std::make_pair(0U, &NVPTX::Int1RegsRegClass); | ||||||
4280 | case 'c': | ||||||
4281 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | ||||||
4282 | case 'h': | ||||||
4283 | return std::make_pair(0U, &NVPTX::Int16RegsRegClass); | ||||||
4284 | case 'r': | ||||||
4285 | return std::make_pair(0U, &NVPTX::Int32RegsRegClass); | ||||||
4286 | case 'l': | ||||||
4287 | case 'N': | ||||||
4288 | return std::make_pair(0U, &NVPTX::Int64RegsRegClass); | ||||||
4289 | case 'f': | ||||||
4290 | return std::make_pair(0U, &NVPTX::Float32RegsRegClass); | ||||||
4291 | case 'd': | ||||||
4292 | return std::make_pair(0U, &NVPTX::Float64RegsRegClass); | ||||||
4293 | } | ||||||
4294 | } | ||||||
4295 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); | ||||||
4296 | } | ||||||
4297 | |||||||
4298 | //===----------------------------------------------------------------------===// | ||||||
4299 | // NVPTX DAG Combining | ||||||
4300 | //===----------------------------------------------------------------------===// | ||||||
4301 | |||||||
4302 | bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, | ||||||
4303 | CodeGenOpt::Level OptLevel) const { | ||||||
4304 | // Always honor command-line argument | ||||||
4305 | if (FMAContractLevelOpt.getNumOccurrences() > 0) | ||||||
4306 | return FMAContractLevelOpt > 0; | ||||||
4307 | |||||||
4308 | // Do not contract if we're not optimizing the code. | ||||||
4309 | if (OptLevel == 0) | ||||||
4310 | return false; | ||||||
4311 | |||||||
4312 | // Honor TargetOptions flags that explicitly say fusion is okay. | ||||||
4313 | if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) | ||||||
4314 | return true; | ||||||
4315 | |||||||
4316 | return allowUnsafeFPMath(MF); | ||||||
4317 | } | ||||||
4318 | |||||||
4319 | bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { | ||||||
4320 | // Honor TargetOptions flags that explicitly say unsafe math is okay. | ||||||
4321 | if (MF.getTarget().Options.UnsafeFPMath) | ||||||
4322 | return true; | ||||||
4323 | |||||||
4324 | // Allow unsafe math if unsafe-fp-math attribute explicitly says so. | ||||||
4325 | const Function &F = MF.getFunction(); | ||||||
4326 | if (F.hasFnAttribute("unsafe-fp-math")) { | ||||||
4327 | Attribute Attr = F.getFnAttribute("unsafe-fp-math"); | ||||||
4328 | StringRef Val = Attr.getValueAsString(); | ||||||
4329 | if (Val == "true") | ||||||
4330 | return true; | ||||||
4331 | } | ||||||
4332 | |||||||
4333 | return false; | ||||||
4334 | } | ||||||
4335 | |||||||
4336 | /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with | ||||||
4337 | /// operands N0 and N1. This is a helper for PerformADDCombine that is | ||||||
4338 | /// called with the default operands, and if that fails, with commuted | ||||||
4339 | /// operands. | ||||||
4340 | static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, | ||||||
4341 | TargetLowering::DAGCombinerInfo &DCI, | ||||||
4342 | const NVPTXSubtarget &Subtarget, | ||||||
4343 | CodeGenOpt::Level OptLevel) { | ||||||
4344 | SelectionDAG &DAG = DCI.DAG; | ||||||
4345 | // Skip non-integer, non-scalar case | ||||||
4346 | EVT VT=N0.getValueType(); | ||||||
4347 | if (VT.isVector()) | ||||||
4348 | return SDValue(); | ||||||
4349 | |||||||
4350 | // fold (add (mul a, b), c) -> (mad a, b, c) | ||||||
4351 | // | ||||||
4352 | if (N0.getOpcode() == ISD::MUL) { | ||||||
4353 | assert (VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4353, __PRETTY_FUNCTION__)); | ||||||
4354 | // For integer: | ||||||
4355 | // Since integer multiply-add costs the same as integer multiply | ||||||
4356 | // but is more costly than integer add, do the fusion only when | ||||||
4357 | // the mul is only used in the add. | ||||||
4358 | if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || | ||||||
4359 | !N0.getNode()->hasOneUse()) | ||||||
4360 | return SDValue(); | ||||||
4361 | |||||||
4362 | // Do the folding | ||||||
4363 | return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, | ||||||
4364 | N0.getOperand(0), N0.getOperand(1), N1); | ||||||
4365 | } | ||||||
4366 | else if (N0.getOpcode() == ISD::FMUL) { | ||||||
4367 | if (VT == MVT::f32 || VT == MVT::f64) { | ||||||
4368 | const auto *TLI = static_cast<const NVPTXTargetLowering *>( | ||||||
4369 | &DAG.getTargetLoweringInfo()); | ||||||
4370 | if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) | ||||||
4371 | return SDValue(); | ||||||
4372 | |||||||
4373 | // For floating point: | ||||||
4374 | // Do the fusion only when the mul has less than 5 uses and all | ||||||
4375 | // are add. | ||||||
4376 | // The heuristic is that if a use is not an add, then that use | ||||||
4377 | // cannot be fused into fma, therefore mul is still needed anyway. | ||||||
4378 | // If there are more than 4 uses, even if they are all add, fusing | ||||||
4379 | // them will increase register pressue. | ||||||
4380 | // | ||||||
4381 | int numUses = 0; | ||||||
4382 | int nonAddCount = 0; | ||||||
4383 | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), | ||||||
4384 | UE = N0.getNode()->use_end(); | ||||||
4385 | UI != UE; ++UI) { | ||||||
4386 | numUses++; | ||||||
4387 | SDNode *User = *UI; | ||||||
4388 | if (User->getOpcode() != ISD::FADD) | ||||||
4389 | ++nonAddCount; | ||||||
4390 | } | ||||||
4391 | if (numUses >= 5) | ||||||
4392 | return SDValue(); | ||||||
4393 | if (nonAddCount) { | ||||||
4394 | int orderNo = N->getIROrder(); | ||||||
4395 | int orderNo2 = N0.getNode()->getIROrder(); | ||||||
4396 | // simple heuristics here for considering potential register | ||||||
4397 | // pressure, the logics here is that the differnce are used | ||||||
4398 | // to measure the distance between def and use, the longer distance | ||||||
4399 | // more likely cause register pressure. | ||||||
4400 | if (orderNo - orderNo2 < 500) | ||||||
4401 | return SDValue(); | ||||||
4402 | |||||||
4403 | // Now, check if at least one of the FMUL's operands is live beyond the node N, | ||||||
4404 | // which guarantees that the FMA will not increase register pressure at node N. | ||||||
4405 | bool opIsLive = false; | ||||||
4406 | const SDNode *left = N0.getOperand(0).getNode(); | ||||||
4407 | const SDNode *right = N0.getOperand(1).getNode(); | ||||||
4408 | |||||||
4409 | if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) | ||||||
4410 | opIsLive = true; | ||||||
4411 | |||||||
4412 | if (!opIsLive) | ||||||
4413 | for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { | ||||||
4414 | SDNode *User = *UI; | ||||||
4415 | int orderNo3 = User->getIROrder(); | ||||||
4416 | if (orderNo3 > orderNo) { | ||||||
4417 | opIsLive = true; | ||||||
4418 | break; | ||||||
4419 | } | ||||||
4420 | } | ||||||
4421 | |||||||
4422 | if (!opIsLive) | ||||||
4423 | for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { | ||||||
4424 | SDNode *User = *UI; | ||||||
4425 | int orderNo3 = User->getIROrder(); | ||||||
4426 | if (orderNo3 > orderNo) { | ||||||
4427 | opIsLive = true; | ||||||
4428 | break; | ||||||
4429 | } | ||||||
4430 | } | ||||||
4431 | |||||||
4432 | if (!opIsLive) | ||||||
4433 | return SDValue(); | ||||||
4434 | } | ||||||
4435 | |||||||
4436 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, | ||||||
4437 | N0.getOperand(0), N0.getOperand(1), N1); | ||||||
4438 | } | ||||||
4439 | } | ||||||
4440 | |||||||
4441 | return SDValue(); | ||||||
4442 | } | ||||||
4443 | |||||||
4444 | /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. | ||||||
4445 | /// | ||||||
4446 | static SDValue PerformADDCombine(SDNode *N, | ||||||
4447 | TargetLowering::DAGCombinerInfo &DCI, | ||||||
4448 | const NVPTXSubtarget &Subtarget, | ||||||
4449 | CodeGenOpt::Level OptLevel) { | ||||||
4450 | SDValue N0 = N->getOperand(0); | ||||||
4451 | SDValue N1 = N->getOperand(1); | ||||||
4452 | |||||||
4453 | // First try with the default operand order. | ||||||
4454 | if (SDValue Result = | ||||||
4455 | PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) | ||||||
4456 | return Result; | ||||||
4457 | |||||||
4458 | // If that didn't work, try again with the operands commuted. | ||||||
4459 | return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); | ||||||
4460 | } | ||||||
4461 | |||||||
4462 | static SDValue PerformANDCombine(SDNode *N, | ||||||
4463 | TargetLowering::DAGCombinerInfo &DCI) { | ||||||
4464 | // The type legalizer turns a vector load of i8 values into a zextload to i16 | ||||||
4465 | // registers, optionally ANY_EXTENDs it (if target type is integer), | ||||||
4466 | // and ANDs off the high 8 bits. Since we turn this load into a | ||||||
4467 | // target-specific DAG node, the DAG combiner fails to eliminate these AND | ||||||
4468 | // nodes. Do that here. | ||||||
4469 | SDValue Val = N->getOperand(0); | ||||||
4470 | SDValue Mask = N->getOperand(1); | ||||||
4471 | |||||||
4472 | if (isa<ConstantSDNode>(Val)) { | ||||||
4473 | std::swap(Val, Mask); | ||||||
4474 | } | ||||||
4475 | |||||||
4476 | SDValue AExt; | ||||||
4477 | // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and | ||||||
4478 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | ||||||
4479 | AExt = Val; | ||||||
4480 | Val = Val->getOperand(0); | ||||||
4481 | } | ||||||
4482 | |||||||
4483 | if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { | ||||||
4484 | Val = Val->getOperand(0); | ||||||
4485 | } | ||||||
4486 | |||||||
4487 | if (Val->getOpcode() == NVPTXISD::LoadV2 || | ||||||
4488 | Val->getOpcode() == NVPTXISD::LoadV4) { | ||||||
4489 | ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); | ||||||
4490 | if (!MaskCnst) { | ||||||
4491 | // Not an AND with a constant | ||||||
4492 | return SDValue(); | ||||||
4493 | } | ||||||
4494 | |||||||
4495 | uint64_t MaskVal = MaskCnst->getZExtValue(); | ||||||
4496 | if (MaskVal != 0xff) { | ||||||
4497 | // Not an AND that chops off top 8 bits | ||||||
4498 | return SDValue(); | ||||||
4499 | } | ||||||
4500 | |||||||
4501 | MemSDNode *Mem = dyn_cast<MemSDNode>(Val); | ||||||
4502 | if (!Mem) { | ||||||
4503 | // Not a MemSDNode?!? | ||||||
4504 | return SDValue(); | ||||||
4505 | } | ||||||
4506 | |||||||
4507 | EVT MemVT = Mem->getMemoryVT(); | ||||||
4508 | if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { | ||||||
4509 | // We only handle the i8 case | ||||||
4510 | return SDValue(); | ||||||
4511 | } | ||||||
4512 | |||||||
4513 | unsigned ExtType = | ||||||
4514 | cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> | ||||||
4515 | getZExtValue(); | ||||||
4516 | if (ExtType == ISD::SEXTLOAD) { | ||||||
4517 | // If for some reason the load is a sextload, the and is needed to zero | ||||||
4518 | // out the high 8 bits | ||||||
4519 | return SDValue(); | ||||||
4520 | } | ||||||
4521 | |||||||
4522 | bool AddTo = false; | ||||||
4523 | if (AExt.getNode() != nullptr) { | ||||||
4524 | // Re-insert the ext as a zext. | ||||||
4525 | Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), | ||||||
4526 | AExt.getValueType(), Val); | ||||||
4527 | AddTo = true; | ||||||
4528 | } | ||||||
4529 | |||||||
4530 | // If we get here, the AND is unnecessary. Just replace it with the load | ||||||
4531 | DCI.CombineTo(N, Val, AddTo); | ||||||
4532 | } | ||||||
4533 | |||||||
4534 | return SDValue(); | ||||||
4535 | } | ||||||
4536 | |||||||
4537 | static SDValue PerformREMCombine(SDNode *N, | ||||||
4538 | TargetLowering::DAGCombinerInfo &DCI, | ||||||
4539 | CodeGenOpt::Level OptLevel) { | ||||||
4540 | assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM)((N->getOpcode() == ISD::SREM || N->getOpcode() == ISD:: UREM) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4540, __PRETTY_FUNCTION__)); | ||||||
4541 | |||||||
4542 | // Don't do anything at less than -O2. | ||||||
4543 | if (OptLevel < CodeGenOpt::Default) | ||||||
4544 | return SDValue(); | ||||||
4545 | |||||||
4546 | SelectionDAG &DAG = DCI.DAG; | ||||||
4547 | SDLoc DL(N); | ||||||
4548 | EVT VT = N->getValueType(0); | ||||||
4549 | bool IsSigned = N->getOpcode() == ISD::SREM; | ||||||
4550 | unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; | ||||||
4551 | |||||||
4552 | const SDValue &Num = N->getOperand(0); | ||||||
4553 | const SDValue &Den = N->getOperand(1); | ||||||
4554 | |||||||
4555 | for (const SDNode *U : Num->uses()) { | ||||||
4556 | if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && | ||||||
4557 | U->getOperand(1) == Den) { | ||||||
4558 | // Num % Den -> Num - (Num / Den) * Den | ||||||
4559 | return DAG.getNode(ISD::SUB, DL, VT, Num, | ||||||
4560 | DAG.getNode(ISD::MUL, DL, VT, | ||||||
4561 | DAG.getNode(DivOpc, DL, VT, Num, Den), | ||||||
4562 | Den)); | ||||||
4563 | } | ||||||
4564 | } | ||||||
4565 | return SDValue(); | ||||||
4566 | } | ||||||
4567 | |||||||
4568 | enum OperandSignedness { | ||||||
4569 | Signed = 0, | ||||||
4570 | Unsigned, | ||||||
4571 | Unknown | ||||||
4572 | }; | ||||||
4573 | |||||||
4574 | /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand | ||||||
4575 | /// that can be demoted to \p OptSize bits without loss of information. The | ||||||
4576 | /// signedness of the operand, if determinable, is placed in \p S. | ||||||
4577 | static bool IsMulWideOperandDemotable(SDValue Op, | ||||||
4578 | unsigned OptSize, | ||||||
4579 | OperandSignedness &S) { | ||||||
4580 | S = Unknown; | ||||||
4581 | |||||||
4582 | if (Op.getOpcode() == ISD::SIGN_EXTEND || | ||||||
4583 | Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { | ||||||
4584 | EVT OrigVT = Op.getOperand(0).getValueType(); | ||||||
4585 | if (OrigVT.getSizeInBits() <= OptSize) { | ||||||
4586 | S = Signed; | ||||||
4587 | return true; | ||||||
4588 | } | ||||||
4589 | } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { | ||||||
4590 | EVT OrigVT = Op.getOperand(0).getValueType(); | ||||||
4591 | if (OrigVT.getSizeInBits() <= OptSize) { | ||||||
4592 | S = Unsigned; | ||||||
4593 | return true; | ||||||
4594 | } | ||||||
4595 | } | ||||||
4596 | |||||||
4597 | return false; | ||||||
4598 | } | ||||||
4599 | |||||||
4600 | /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can | ||||||
4601 | /// be demoted to \p OptSize bits without loss of information. If the operands | ||||||
4602 | /// contain a constant, it should appear as the RHS operand. The signedness of | ||||||
4603 | /// the operands is placed in \p IsSigned. | ||||||
4604 | static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, | ||||||
4605 | unsigned OptSize, | ||||||
4606 | bool &IsSigned) { | ||||||
4607 | OperandSignedness LHSSign; | ||||||
4608 | |||||||
4609 | // The LHS operand must be a demotable op | ||||||
4610 | if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) | ||||||
4611 | return false; | ||||||
4612 | |||||||
4613 | // We should have been able to determine the signedness from the LHS | ||||||
4614 | if (LHSSign == Unknown) | ||||||
4615 | return false; | ||||||
4616 | |||||||
4617 | IsSigned = (LHSSign == Signed); | ||||||
4618 | |||||||
4619 | // The RHS can be a demotable op or a constant | ||||||
4620 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { | ||||||
4621 | const APInt &Val = CI->getAPIntValue(); | ||||||
4622 | if (LHSSign == Unsigned) { | ||||||
4623 | return Val.isIntN(OptSize); | ||||||
4624 | } else { | ||||||
4625 | return Val.isSignedIntN(OptSize); | ||||||
4626 | } | ||||||
4627 | } else { | ||||||
4628 | OperandSignedness RHSSign; | ||||||
4629 | if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) | ||||||
4630 | return false; | ||||||
4631 | |||||||
4632 | return LHSSign == RHSSign; | ||||||
4633 | } | ||||||
4634 | } | ||||||
4635 | |||||||
4636 | /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply | ||||||
4637 | /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform | ||||||
4638 | /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift | ||||||
4639 | /// amount. | ||||||
4640 | static SDValue TryMULWIDECombine(SDNode *N, | ||||||
4641 | TargetLowering::DAGCombinerInfo &DCI) { | ||||||
4642 | EVT MulType = N->getValueType(0); | ||||||
4643 | if (MulType != MVT::i32 && MulType != MVT::i64) { | ||||||
4644 | return SDValue(); | ||||||
4645 | } | ||||||
4646 | |||||||
4647 | SDLoc DL(N); | ||||||
4648 | unsigned OptSize = MulType.getSizeInBits() >> 1; | ||||||
4649 | SDValue LHS = N->getOperand(0); | ||||||
4650 | SDValue RHS = N->getOperand(1); | ||||||
4651 | |||||||
4652 | // Canonicalize the multiply so the constant (if any) is on the right | ||||||
4653 | if (N->getOpcode() == ISD::MUL) { | ||||||
4654 | if (isa<ConstantSDNode>(LHS)) { | ||||||
4655 | std::swap(LHS, RHS); | ||||||
4656 | } | ||||||
4657 | } | ||||||
4658 | |||||||
4659 | // If we have a SHL, determine the actual multiply amount | ||||||
4660 | if (N->getOpcode() == ISD::SHL) { | ||||||
4661 | ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); | ||||||
4662 | if (!ShlRHS) { | ||||||
4663 | return SDValue(); | ||||||
4664 | } | ||||||
4665 | |||||||
4666 | APInt ShiftAmt = ShlRHS->getAPIntValue(); | ||||||
4667 | unsigned BitWidth = MulType.getSizeInBits(); | ||||||
4668 | if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { | ||||||
4669 | APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; | ||||||
4670 | RHS = DCI.DAG.getConstant(MulVal, DL, MulType); | ||||||
4671 | } else { | ||||||
4672 | return SDValue(); | ||||||
4673 | } | ||||||
4674 | } | ||||||
4675 | |||||||
4676 | bool Signed; | ||||||
4677 | // Verify that our operands are demotable | ||||||
4678 | if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { | ||||||
4679 | return SDValue(); | ||||||
4680 | } | ||||||
4681 | |||||||
4682 | EVT DemotedVT; | ||||||
4683 | if (MulType == MVT::i32) { | ||||||
4684 | DemotedVT = MVT::i16; | ||||||
4685 | } else { | ||||||
4686 | DemotedVT = MVT::i32; | ||||||
4687 | } | ||||||
4688 | |||||||
4689 | // Truncate the operands to the correct size. Note that these are just for | ||||||
4690 | // type consistency and will (likely) be eliminated in later phases. | ||||||
4691 | SDValue TruncLHS = | ||||||
4692 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); | ||||||
4693 | SDValue TruncRHS = | ||||||
4694 | DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); | ||||||
4695 | |||||||
4696 | unsigned Opc; | ||||||
4697 | if (Signed) { | ||||||
4698 | Opc = NVPTXISD::MUL_WIDE_SIGNED; | ||||||
4699 | } else { | ||||||
4700 | Opc = NVPTXISD::MUL_WIDE_UNSIGNED; | ||||||
4701 | } | ||||||
4702 | |||||||
4703 | return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); | ||||||
4704 | } | ||||||
4705 | |||||||
4706 | /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. | ||||||
4707 | static SDValue PerformMULCombine(SDNode *N, | ||||||
4708 | TargetLowering::DAGCombinerInfo &DCI, | ||||||
4709 | CodeGenOpt::Level OptLevel) { | ||||||
4710 | if (OptLevel > 0) { | ||||||
4711 | // Try mul.wide combining at OptLevel > 0 | ||||||
4712 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) | ||||||
4713 | return Ret; | ||||||
4714 | } | ||||||
4715 | |||||||
4716 | return SDValue(); | ||||||
4717 | } | ||||||
4718 | |||||||
4719 | /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. | ||||||
4720 | static SDValue PerformSHLCombine(SDNode *N, | ||||||
4721 | TargetLowering::DAGCombinerInfo &DCI, | ||||||
4722 | CodeGenOpt::Level OptLevel) { | ||||||
4723 | if (OptLevel > 0) { | ||||||
4724 | // Try mul.wide combining at OptLevel > 0 | ||||||
4725 | if (SDValue Ret = TryMULWIDECombine(N, DCI)) | ||||||
4726 | return Ret; | ||||||
4727 | } | ||||||
4728 | |||||||
4729 | return SDValue(); | ||||||
4730 | } | ||||||
4731 | |||||||
4732 | static SDValue PerformSETCCCombine(SDNode *N, | ||||||
4733 | TargetLowering::DAGCombinerInfo &DCI) { | ||||||
4734 | EVT CCType = N->getValueType(0); | ||||||
4735 | SDValue A = N->getOperand(0); | ||||||
4736 | SDValue B = N->getOperand(1); | ||||||
4737 | |||||||
4738 | if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16) | ||||||
4739 | return SDValue(); | ||||||
4740 | |||||||
4741 | SDLoc DL(N); | ||||||
4742 | // setp.f16x2 returns two scalar predicates, which we need to | ||||||
4743 | // convert back to v2i1. The returned result will be scalarized by | ||||||
4744 | // the legalizer, but the comparison will remain a single vector | ||||||
4745 | // instruction. | ||||||
4746 | SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL, | ||||||
4747 | DCI.DAG.getVTList(MVT::i1, MVT::i1), | ||||||
4748 | {A, B, N->getOperand(2)}); | ||||||
4749 | return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0), | ||||||
4750 | CCNode.getValue(1)); | ||||||
4751 | } | ||||||
4752 | |||||||
4753 | SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, | ||||||
4754 | DAGCombinerInfo &DCI) const { | ||||||
4755 | CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); | ||||||
4756 | switch (N->getOpcode()) { | ||||||
4757 | default: break; | ||||||
4758 | case ISD::ADD: | ||||||
4759 | case ISD::FADD: | ||||||
4760 | return PerformADDCombine(N, DCI, STI, OptLevel); | ||||||
4761 | case ISD::MUL: | ||||||
4762 | return PerformMULCombine(N, DCI, OptLevel); | ||||||
4763 | case ISD::SHL: | ||||||
4764 | return PerformSHLCombine(N, DCI, OptLevel); | ||||||
4765 | case ISD::AND: | ||||||
4766 | return PerformANDCombine(N, DCI); | ||||||
4767 | case ISD::UREM: | ||||||
4768 | case ISD::SREM: | ||||||
4769 | return PerformREMCombine(N, DCI, OptLevel); | ||||||
4770 | case ISD::SETCC: | ||||||
4771 | return PerformSETCCCombine(N, DCI); | ||||||
4772 | } | ||||||
4773 | return SDValue(); | ||||||
4774 | } | ||||||
4775 | |||||||
4776 | /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. | ||||||
4777 | static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, | ||||||
4778 | SmallVectorImpl<SDValue> &Results) { | ||||||
4779 | EVT ResVT = N->getValueType(0); | ||||||
4780 | SDLoc DL(N); | ||||||
4781 | |||||||
4782 | assert(ResVT.isVector() && "Vector load must have vector type")((ResVT.isVector() && "Vector load must have vector type" ) ? static_cast<void> (0) : __assert_fail ("ResVT.isVector() && \"Vector load must have vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4782, __PRETTY_FUNCTION__)); | ||||||
4783 | |||||||
4784 | // We only handle "native" vector sizes for now, e.g. <4 x double> is not | ||||||
4785 | // legal. We can (and should) split that into 2 loads of <2 x double> here | ||||||
4786 | // but I'm leaving that as a TODO for now. | ||||||
4787 | assert(ResVT.isSimple() && "Can only handle simple types")((ResVT.isSimple() && "Can only handle simple types") ? static_cast<void> (0) : __assert_fail ("ResVT.isSimple() && \"Can only handle simple types\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4787, __PRETTY_FUNCTION__)); | ||||||
4788 | switch (ResVT.getSimpleVT().SimpleTy) { | ||||||
4789 | default: | ||||||
4790 | return; | ||||||
4791 | case MVT::v2i8: | ||||||
4792 | case MVT::v2i16: | ||||||
4793 | case MVT::v2i32: | ||||||
4794 | case MVT::v2i64: | ||||||
4795 | case MVT::v2f16: | ||||||
4796 | case MVT::v2f32: | ||||||
4797 | case MVT::v2f64: | ||||||
4798 | case MVT::v4i8: | ||||||
4799 | case MVT::v4i16: | ||||||
4800 | case MVT::v4i32: | ||||||
4801 | case MVT::v4f16: | ||||||
4802 | case MVT::v4f32: | ||||||
4803 | case MVT::v8f16: // <4 x f16x2> | ||||||
4804 | // This is a "native" vector type | ||||||
4805 | break; | ||||||
4806 | } | ||||||
4807 | |||||||
4808 | LoadSDNode *LD = cast<LoadSDNode>(N); | ||||||
4809 | |||||||
4810 | unsigned Align = LD->getAlignment(); | ||||||
4811 | auto &TD = DAG.getDataLayout(); | ||||||
4812 | unsigned PrefAlign = | ||||||
4813 | TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); | ||||||
4814 | if (Align < PrefAlign) { | ||||||
4815 | // This load is not sufficiently aligned, so bail out and let this vector | ||||||
4816 | // load be scalarized. Note that we may still be able to emit smaller | ||||||
4817 | // vector loads. For example, if we are loading a <4 x float> with an | ||||||
4818 | // alignment of 8, this check will fail but the legalizer will try again | ||||||
4819 | // with 2 x <2 x float>, which will succeed with an alignment of 8. | ||||||
4820 | return; | ||||||
4821 | } | ||||||
4822 | |||||||
4823 | EVT EltVT = ResVT.getVectorElementType(); | ||||||
4824 | unsigned NumElts = ResVT.getVectorNumElements(); | ||||||
4825 | |||||||
4826 | // Since LoadV2 is a target node, we cannot rely on DAG type legalization. | ||||||
4827 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | ||||||
4828 | // loaded type to i16 and propagate the "real" type as the memory type. | ||||||
4829 | bool NeedTrunc = false; | ||||||
4830 | if (EltVT.getSizeInBits() < 16) { | ||||||
4831 | EltVT = MVT::i16; | ||||||
4832 | NeedTrunc = true; | ||||||
4833 | } | ||||||
4834 | |||||||
4835 | unsigned Opcode = 0; | ||||||
4836 | SDVTList LdResVTs; | ||||||
4837 | bool LoadF16x2 = false; | ||||||
4838 | |||||||
4839 | switch (NumElts) { | ||||||
4840 | default: | ||||||
4841 | return; | ||||||
4842 | case 2: | ||||||
4843 | Opcode = NVPTXISD::LoadV2; | ||||||
4844 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | ||||||
4845 | break; | ||||||
4846 | case 4: { | ||||||
4847 | Opcode = NVPTXISD::LoadV4; | ||||||
4848 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | ||||||
4849 | LdResVTs = DAG.getVTList(ListVTs); | ||||||
4850 | break; | ||||||
4851 | } | ||||||
4852 | case 8: { | ||||||
4853 | // v8f16 is a special case. PTX doesn't have ld.v8.f16 | ||||||
4854 | // instruction. Instead, we split the vector into v2f16 chunks and | ||||||
4855 | // load them with ld.v4.b32. | ||||||
4856 | assert(EltVT == MVT::f16 && "Unsupported v8 vector type.")((EltVT == MVT::f16 && "Unsupported v8 vector type.") ? static_cast<void> (0) : __assert_fail ("EltVT == MVT::f16 && \"Unsupported v8 vector type.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 4856, __PRETTY_FUNCTION__)); | ||||||
4857 | LoadF16x2 = true; | ||||||
4858 | Opcode = NVPTXISD::LoadV4; | ||||||
4859 | EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16, | ||||||
4860 | MVT::Other}; | ||||||
4861 | LdResVTs = DAG.getVTList(ListVTs); | ||||||
4862 | break; | ||||||
4863 | } | ||||||
4864 | } | ||||||
4865 | |||||||
4866 | // Copy regular operands | ||||||
4867 | SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); | ||||||
4868 | |||||||
4869 | // The select routine does not have access to the LoadSDNode instance, so | ||||||
4870 | // pass along the extension information | ||||||
4871 | OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); | ||||||
4872 | |||||||
4873 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | ||||||
4874 | LD->getMemoryVT(), | ||||||
4875 | LD->getMemOperand()); | ||||||
4876 | |||||||
4877 | SmallVector<SDValue, 8> ScalarRes; | ||||||
4878 | if (LoadF16x2) { | ||||||
4879 | // Split v2f16 subvectors back into individual elements. | ||||||
4880 | NumElts /= 2; | ||||||
4881 | for (unsigned i = 0; i < NumElts; ++i) { | ||||||
4882 | SDValue SubVector = NewLD.getValue(i); | ||||||
4883 | SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, | ||||||
4884 | DAG.getIntPtrConstant(0, DL)); | ||||||
4885 | SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, | ||||||
4886 | DAG.getIntPtrConstant(1, DL)); | ||||||
4887 | ScalarRes.push_back(E0); | ||||||
4888 | ScalarRes.push_back(E1); | ||||||
4889 | } | ||||||
4890 | } else { | ||||||
4891 | for (unsigned i = 0; i < NumElts; ++i) { | ||||||
4892 | SDValue Res = NewLD.getValue(i); | ||||||
4893 | if (NeedTrunc) | ||||||
4894 | Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | ||||||
4895 | ScalarRes.push_back(Res); | ||||||
4896 | } | ||||||
4897 | } | ||||||
4898 | |||||||
4899 | SDValue LoadChain = NewLD.getValue(NumElts); | ||||||
4900 | |||||||
4901 | SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); | ||||||
4902 | |||||||
4903 | Results.push_back(BuildVec); | ||||||
4904 | Results.push_back(LoadChain); | ||||||
4905 | } | ||||||
4906 | |||||||
4907 | static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, | ||||||
4908 | SmallVectorImpl<SDValue> &Results) { | ||||||
4909 | SDValue Chain = N->getOperand(0); | ||||||
4910 | SDValue Intrin = N->getOperand(1); | ||||||
4911 | SDLoc DL(N); | ||||||
4912 | |||||||
4913 | // Get the intrinsic ID | ||||||
4914 | unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); | ||||||
4915 | switch (IntrinNo) { | ||||||
4916 | default: | ||||||
4917 | return; | ||||||
4918 | case Intrinsic::nvvm_ldg_global_i: | ||||||
4919 | case Intrinsic::nvvm_ldg_global_f: | ||||||
4920 | case Intrinsic::nvvm_ldg_global_p: | ||||||
4921 | case Intrinsic::nvvm_ldu_global_i: | ||||||
4922 | case Intrinsic::nvvm_ldu_global_f: | ||||||
4923 | case Intrinsic::nvvm_ldu_global_p: { | ||||||
4924 | EVT ResVT = N->getValueType(0); | ||||||
4925 | |||||||
4926 | if (ResVT.isVector()) { | ||||||
4927 | // Vector LDG/LDU | ||||||
4928 | |||||||
4929 | unsigned NumElts = ResVT.getVectorNumElements(); | ||||||
4930 | EVT EltVT = ResVT.getVectorElementType(); | ||||||
4931 | |||||||
4932 | // Since LDU/LDG are target nodes, we cannot rely on DAG type | ||||||
4933 | // legalization. | ||||||
4934 | // Therefore, we must ensure the type is legal. For i1 and i8, we set the | ||||||
4935 | // loaded type to i16 and propagate the "real" type as the memory type. | ||||||
4936 | bool NeedTrunc = false; | ||||||
4937 | if (EltVT.getSizeInBits() < 16) { | ||||||
4938 | EltVT = MVT::i16; | ||||||
4939 | NeedTrunc = true; | ||||||
4940 | } | ||||||
4941 | |||||||
4942 | unsigned Opcode = 0; | ||||||
4943 | SDVTList LdResVTs; | ||||||
4944 | |||||||
4945 | switch (NumElts) { | ||||||
4946 | default: | ||||||
4947 | return; | ||||||
4948 | case 2: | ||||||
4949 | switch (IntrinNo) { | ||||||
4950 | default: | ||||||
4951 | return; | ||||||
4952 | case Intrinsic::nvvm_ldg_global_i: | ||||||
4953 | case Intrinsic::nvvm_ldg_global_f: | ||||||
4954 | case Intrinsic::nvvm_ldg_global_p: | ||||||
4955 | Opcode = NVPTXISD::LDGV2; | ||||||
4956 | break; | ||||||
4957 | case Intrinsic::nvvm_ldu_global_i: | ||||||
4958 | case Intrinsic::nvvm_ldu_global_f: | ||||||
4959 | case Intrinsic::nvvm_ldu_global_p: | ||||||
4960 | Opcode = NVPTXISD::LDUV2; | ||||||
4961 | break; | ||||||
4962 | } | ||||||
4963 | LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); | ||||||
4964 | break; | ||||||
4965 | case 4: { | ||||||
4966 | switch (IntrinNo) { | ||||||
4967 | default: | ||||||
4968 | return; | ||||||
4969 | case Intrinsic::nvvm_ldg_global_i: | ||||||
4970 | case Intrinsic::nvvm_ldg_global_f: | ||||||
4971 | case Intrinsic::nvvm_ldg_global_p: | ||||||
4972 | Opcode = NVPTXISD::LDGV4; | ||||||
4973 | break; | ||||||
4974 | case Intrinsic::nvvm_ldu_global_i: | ||||||
4975 | case Intrinsic::nvvm_ldu_global_f: | ||||||
4976 | case Intrinsic::nvvm_ldu_global_p: | ||||||
4977 | Opcode = NVPTXISD::LDUV4; | ||||||
4978 | break; | ||||||
4979 | } | ||||||
4980 | EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; | ||||||
4981 | LdResVTs = DAG.getVTList(ListVTs); | ||||||
4982 | break; | ||||||
4983 | } | ||||||
4984 | } | ||||||
4985 | |||||||
4986 | SmallVector<SDValue, 8> OtherOps; | ||||||
4987 | |||||||
4988 | // Copy regular operands | ||||||
4989 | |||||||
4990 | OtherOps.push_back(Chain); // Chain | ||||||
4991 | // Skip operand 1 (intrinsic ID) | ||||||
4992 | // Others | ||||||
4993 | OtherOps.append(N->op_begin() + 2, N->op_end()); | ||||||
4994 | |||||||
4995 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | ||||||
4996 | |||||||
4997 | SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, | ||||||
4998 | MemSD->getMemoryVT(), | ||||||
4999 | MemSD->getMemOperand()); | ||||||
5000 | |||||||
5001 | SmallVector<SDValue, 4> ScalarRes; | ||||||
5002 | |||||||
5003 | for (unsigned i = 0; i < NumElts; ++i) { | ||||||
5004 | SDValue Res = NewLD.getValue(i); | ||||||
5005 | if (NeedTrunc) | ||||||
5006 | Res = | ||||||
5007 | DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); | ||||||
5008 | ScalarRes.push_back(Res); | ||||||
5009 | } | ||||||
5010 | |||||||
5011 | SDValue LoadChain = NewLD.getValue(NumElts); | ||||||
5012 | |||||||
5013 | SDValue BuildVec = | ||||||
5014 | DAG.getBuildVector(ResVT, DL, ScalarRes); | ||||||
5015 | |||||||
5016 | Results.push_back(BuildVec); | ||||||
5017 | Results.push_back(LoadChain); | ||||||
5018 | } else { | ||||||
5019 | // i8 LDG/LDU | ||||||
5020 | assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 5021, __PRETTY_FUNCTION__)) | ||||||
5021 | "Custom handling of non-i8 ldu/ldg?")((ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && "Custom handling of non-i8 ldu/ldg?") ? static_cast <void> (0) : __assert_fail ("ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && \"Custom handling of non-i8 ldu/ldg?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp" , 5021, __PRETTY_FUNCTION__)); | ||||||
5022 | |||||||
5023 | // Just copy all operands as-is | ||||||
5024 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); | ||||||
5025 | |||||||
5026 | // Force output to i16 | ||||||
5027 | SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); | ||||||
5028 | |||||||
5029 | MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); | ||||||
5030 | |||||||
5031 | // We make sure the memory type is i8, which will be used during isel | ||||||
5032 | // to select the proper instruction. | ||||||
5033 | SDValue NewLD = | ||||||
5034 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, | ||||||
5035 | MVT::i8, MemSD->getMemOperand()); | ||||||
5036 | |||||||
5037 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, | ||||||
5038 | NewLD.getValue(0))); | ||||||
5039 | Results.push_back(NewLD.getValue(1)); | ||||||
5040 | } | ||||||
5041 | } | ||||||
5042 | } | ||||||
5043 | } | ||||||
5044 | |||||||
5045 | void NVPTXTargetLowering::ReplaceNodeResults( | ||||||
5046 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { | ||||||
5047 | switch (N->getOpcode()) { | ||||||
5048 | default: | ||||||
5049 | report_fatal_error("Unhandled custom legalization"); | ||||||
5050 | case ISD::LOAD: | ||||||
5051 | ReplaceLoadVector(N, DAG, Results); | ||||||
5052 | return; | ||||||
5053 | case ISD::INTRINSIC_W_CHAIN: | ||||||
5054 | ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); | ||||||
5055 | return; | ||||||
5056 | } | ||||||
5057 | } | ||||||
5058 | |||||||
5059 | // Pin NVPTXTargetObjectFile's vtables to this file. | ||||||
5060 | NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {} | ||||||
5061 | |||||||
5062 | MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( | ||||||
5063 | const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { | ||||||
5064 | return getDataSection(); | ||||||
5065 | } |
1 | //===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the set of low-level target independent types which various |
10 | // values in the code generator are. This allows the target specific behavior |
11 | // of instructions to be described to target independent passes. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CODEGEN_VALUETYPES_H |
16 | #define LLVM_CODEGEN_VALUETYPES_H |
17 | |
18 | #include "llvm/Support/Compiler.h" |
19 | #include "llvm/Support/MachineValueType.h" |
20 | #include "llvm/Support/MathExtras.h" |
21 | #include "llvm/Support/TypeSize.h" |
22 | #include <cassert> |
23 | #include <cstdint> |
24 | #include <string> |
25 | |
26 | namespace llvm { |
27 | |
28 | class LLVMContext; |
29 | class Type; |
30 | |
31 | /// Extended Value Type. Capable of holding value types which are not native |
32 | /// for any processor (such as the i12345 type), as well as the types an MVT |
33 | /// can represent. |
34 | struct EVT { |
35 | private: |
36 | MVT V = MVT::INVALID_SIMPLE_VALUE_TYPE; |
37 | Type *LLVMTy = nullptr; |
38 | |
39 | public: |
40 | constexpr EVT() = default; |
41 | constexpr EVT(MVT::SimpleValueType SVT) : V(SVT) {} |
42 | constexpr EVT(MVT S) : V(S) {} |
43 | |
44 | bool operator==(EVT VT) const { |
45 | return !(*this != VT); |
46 | } |
47 | bool operator!=(EVT VT) const { |
48 | if (V.SimpleTy != VT.V.SimpleTy) |
49 | return true; |
50 | if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) |
51 | return LLVMTy != VT.LLVMTy; |
52 | return false; |
53 | } |
54 | |
55 | /// Returns the EVT that represents a floating-point type with the given |
56 | /// number of bits. There are two floating-point types with 128 bits - this |
57 | /// returns f128 rather than ppcf128. |
58 | static EVT getFloatingPointVT(unsigned BitWidth) { |
59 | return MVT::getFloatingPointVT(BitWidth); |
60 | } |
61 | |
62 | /// Returns the EVT that represents an integer with the given number of |
63 | /// bits. |
64 | static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) { |
65 | MVT M = MVT::getIntegerVT(BitWidth); |
66 | if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) |
67 | return M; |
68 | return getExtendedIntegerVT(Context, BitWidth); |
69 | } |
70 | |
71 | /// Returns the EVT that represents a vector NumElements in length, where |
72 | /// each element is of type VT. |
73 | static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, |
74 | bool IsScalable = false) { |
75 | MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable); |
76 | if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) |
77 | return M; |
78 | |
79 | assert(!IsScalable && "We don't support extended scalable types yet")((!IsScalable && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!IsScalable && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 79, __PRETTY_FUNCTION__)); |
80 | return getExtendedVectorVT(Context, VT, NumElements); |
81 | } |
82 | |
83 | /// Returns the EVT that represents a vector EC.Min elements in length, |
84 | /// where each element is of type VT. |
85 | static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { |
86 | MVT M = MVT::getVectorVT(VT.V, EC); |
87 | if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) |
88 | return M; |
89 | assert (!EC.Scalable && "We don't support extended scalable types yet")((!EC.Scalable && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!EC.Scalable && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 89, __PRETTY_FUNCTION__)); |
90 | return getExtendedVectorVT(Context, VT, EC.Min); |
91 | } |
92 | |
93 | /// Return a vector with the same number of elements as this vector, but |
94 | /// with the element type converted to an integer type with the same |
95 | /// bitwidth. |
96 | EVT changeVectorElementTypeToInteger() const { |
97 | if (!isSimple()) { |
98 | assert (!isScalableVector() &&((!isScalableVector() && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!isScalableVector() && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 99, __PRETTY_FUNCTION__)) |
99 | "We don't support extended scalable types yet")((!isScalableVector() && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!isScalableVector() && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 99, __PRETTY_FUNCTION__)); |
100 | return changeExtendedVectorElementTypeToInteger(); |
101 | } |
102 | MVT EltTy = getSimpleVT().getVectorElementType(); |
103 | unsigned BitWidth = EltTy.getSizeInBits(); |
104 | MVT IntTy = MVT::getIntegerVT(BitWidth); |
105 | MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(), |
106 | isScalableVector()); |
107 | assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&((VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && "Simple vector VT not representable by simple integer vector VT!" ) ? static_cast<void> (0) : __assert_fail ("VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && \"Simple vector VT not representable by simple integer vector VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 108, __PRETTY_FUNCTION__)) |
108 | "Simple vector VT not representable by simple integer vector VT!")((VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && "Simple vector VT not representable by simple integer vector VT!" ) ? static_cast<void> (0) : __assert_fail ("VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && \"Simple vector VT not representable by simple integer vector VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 108, __PRETTY_FUNCTION__)); |
109 | return VecTy; |
110 | } |
111 | |
112 | /// Return the type converted to an equivalently sized integer or vector |
113 | /// with integer element type. Similar to changeVectorElementTypeToInteger, |
114 | /// but also handles scalars. |
115 | EVT changeTypeToInteger() { |
116 | if (isVector()) |
117 | return changeVectorElementTypeToInteger(); |
118 | |
119 | if (isSimple()) |
120 | return MVT::getIntegerVT(getSizeInBits()); |
121 | |
122 | return changeExtendedTypeToInteger(); |
123 | } |
124 | |
125 | /// Test if the given EVT is simple (as opposed to being extended). |
126 | bool isSimple() const { |
127 | return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE; |
128 | } |
129 | |
130 | /// Test if the given EVT is extended (as opposed to being simple). |
131 | bool isExtended() const { |
132 | return !isSimple(); |
133 | } |
134 | |
135 | /// Return true if this is a FP or a vector FP type. |
136 | bool isFloatingPoint() const { |
137 | return isSimple() ? V.isFloatingPoint() : isExtendedFloatingPoint(); |
138 | } |
139 | |
140 | /// Return true if this is an integer or a vector integer type. |
141 | bool isInteger() const { |
142 | return isSimple() ? V.isInteger() : isExtendedInteger(); |
143 | } |
144 | |
145 | /// Return true if this is an integer, but not a vector. |
146 | bool isScalarInteger() const { |
147 | return isSimple() ? V.isScalarInteger() : isExtendedScalarInteger(); |
148 | } |
149 | |
150 | /// Return true if this is a vector value type. |
151 | bool isVector() const { |
152 | return isSimple() ? V.isVector() : isExtendedVector(); |
153 | } |
154 | |
155 | /// Return true if this is a vector type where the runtime |
156 | /// length is machine dependent |
157 | bool isScalableVector() const { |
158 | // FIXME: We don't support extended scalable types yet, because the |
159 | // matching IR type doesn't exist. Once it has been added, this can |
160 | // be changed to call isExtendedScalableVector. |
161 | if (!isSimple()) |
162 | return false; |
163 | return V.isScalableVector(); |
164 | } |
165 | |
166 | /// Return true if this is a 16-bit vector type. |
167 | bool is16BitVector() const { |
168 | return isSimple() ? V.is16BitVector() : isExtended16BitVector(); |
169 | } |
170 | |
171 | /// Return true if this is a 32-bit vector type. |
172 | bool is32BitVector() const { |
173 | return isSimple() ? V.is32BitVector() : isExtended32BitVector(); |
174 | } |
175 | |
176 | /// Return true if this is a 64-bit vector type. |
177 | bool is64BitVector() const { |
178 | return isSimple() ? V.is64BitVector() : isExtended64BitVector(); |
179 | } |
180 | |
181 | /// Return true if this is a 128-bit vector type. |
182 | bool is128BitVector() const { |
183 | return isSimple() ? V.is128BitVector() : isExtended128BitVector(); |
184 | } |
185 | |
186 | /// Return true if this is a 256-bit vector type. |
187 | bool is256BitVector() const { |
188 | return isSimple() ? V.is256BitVector() : isExtended256BitVector(); |
189 | } |
190 | |
191 | /// Return true if this is a 512-bit vector type. |
192 | bool is512BitVector() const { |
193 | return isSimple() ? V.is512BitVector() : isExtended512BitVector(); |
194 | } |
195 | |
196 | /// Return true if this is a 1024-bit vector type. |
197 | bool is1024BitVector() const { |
198 | return isSimple() ? V.is1024BitVector() : isExtended1024BitVector(); |
199 | } |
200 | |
201 | /// Return true if this is a 2048-bit vector type. |
202 | bool is2048BitVector() const { |
203 | return isSimple() ? V.is2048BitVector() : isExtended2048BitVector(); |
204 | } |
205 | |
206 | /// Return true if this is an overloaded type for TableGen. |
207 | bool isOverloaded() const { |
208 | return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny); |
209 | } |
210 | |
211 | /// Return true if the bit size is a multiple of 8. |
212 | bool isByteSized() const { |
213 | return getSizeInBits().isByteSized(); |
214 | } |
215 | |
216 | /// Return true if the size is a power-of-two number of bytes. |
217 | bool isRound() const { |
218 | if (isScalableVector()) |
219 | return false; |
220 | unsigned BitSize = getSizeInBits(); |
221 | return BitSize >= 8 && !(BitSize & (BitSize - 1)); |
222 | } |
223 | |
224 | /// Return true if this has the same number of bits as VT. |
225 | bool bitsEq(EVT VT) const { |
226 | if (EVT::operator==(VT)) return true; |
227 | return getSizeInBits() == VT.getSizeInBits(); |
228 | } |
229 | |
230 | /// Return true if this has more bits than VT. |
231 | bool bitsGT(EVT VT) const { |
232 | if (EVT::operator==(VT)) return false; |
233 | return getSizeInBits() > VT.getSizeInBits(); |
234 | } |
235 | |
236 | /// Return true if this has no less bits than VT. |
237 | bool bitsGE(EVT VT) const { |
238 | if (EVT::operator==(VT)) return true; |
239 | return getSizeInBits() >= VT.getSizeInBits(); |
240 | } |
241 | |
242 | /// Return true if this has less bits than VT. |
243 | bool bitsLT(EVT VT) const { |
244 | if (EVT::operator==(VT)) return false; |
245 | return getSizeInBits() < VT.getSizeInBits(); |
246 | } |
247 | |
248 | /// Return true if this has no more bits than VT. |
249 | bool bitsLE(EVT VT) const { |
250 | if (EVT::operator==(VT)) return true; |
251 | return getSizeInBits() <= VT.getSizeInBits(); |
252 | } |
253 | |
254 | /// Return the SimpleValueType held in the specified simple EVT. |
255 | MVT getSimpleVT() const { |
256 | assert(isSimple() && "Expected a SimpleValueType!")((isSimple() && "Expected a SimpleValueType!") ? static_cast <void> (0) : __assert_fail ("isSimple() && \"Expected a SimpleValueType!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 256, __PRETTY_FUNCTION__)); |
257 | return V; |
258 | } |
259 | |
260 | /// If this is a vector type, return the element type, otherwise return |
261 | /// this. |
262 | EVT getScalarType() const { |
263 | return isVector() ? getVectorElementType() : *this; |
264 | } |
265 | |
266 | /// Given a vector type, return the type of each element. |
267 | EVT getVectorElementType() const { |
268 | assert(isVector() && "Invalid vector type!")((isVector() && "Invalid vector type!") ? static_cast <void> (0) : __assert_fail ("isVector() && \"Invalid vector type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 268, __PRETTY_FUNCTION__)); |
269 | if (isSimple()) |
270 | return V.getVectorElementType(); |
271 | return getExtendedVectorElementType(); |
272 | } |
273 | |
274 | /// Given a vector type, return the number of elements it contains. |
275 | unsigned getVectorNumElements() const { |
276 | assert(isVector() && "Invalid vector type!")((isVector() && "Invalid vector type!") ? static_cast <void> (0) : __assert_fail ("isVector() && \"Invalid vector type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 276, __PRETTY_FUNCTION__)); |
277 | if (isSimple()) |
278 | return V.getVectorNumElements(); |
279 | return getExtendedVectorNumElements(); |
280 | } |
281 | |
282 | // Given a (possibly scalable) vector type, return the ElementCount |
283 | ElementCount getVectorElementCount() const { |
284 | assert((isVector()) && "Invalid vector type!")(((isVector()) && "Invalid vector type!") ? static_cast <void> (0) : __assert_fail ("(isVector()) && \"Invalid vector type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 284, __PRETTY_FUNCTION__)); |
285 | if (isSimple()) |
286 | return V.getVectorElementCount(); |
287 | |
288 | assert(!isScalableVector() &&((!isScalableVector() && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!isScalableVector() && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 289, __PRETTY_FUNCTION__)) |
289 | "We don't support extended scalable types yet")((!isScalableVector() && "We don't support extended scalable types yet" ) ? static_cast<void> (0) : __assert_fail ("!isScalableVector() && \"We don't support extended scalable types yet\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 289, __PRETTY_FUNCTION__)); |
290 | return {getExtendedVectorNumElements(), false}; |
291 | } |
292 | |
293 | /// Return the size of the specified value type in bits. |
294 | /// |
295 | /// If the value type is a scalable vector type, the scalable property will |
296 | /// be set and the runtime size will be a positive integer multiple of the |
297 | /// base size. |
298 | TypeSize getSizeInBits() const { |
299 | if (isSimple()) |
300 | return V.getSizeInBits(); |
301 | return getExtendedSizeInBits(); |
302 | } |
303 | |
304 | TypeSize getScalarSizeInBits() const { |
305 | return getScalarType().getSizeInBits(); |
306 | } |
307 | |
308 | /// Return the number of bytes overwritten by a store of the specified value |
309 | /// type. |
310 | /// |
311 | /// If the value type is a scalable vector type, the scalable property will |
312 | /// be set and the runtime size will be a positive integer multiple of the |
313 | /// base size. |
314 | TypeSize getStoreSize() const { |
315 | TypeSize BaseSize = getSizeInBits(); |
316 | return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()}; |
317 | } |
318 | |
319 | /// Return the number of bits overwritten by a store of the specified value |
320 | /// type. |
321 | /// |
322 | /// If the value type is a scalable vector type, the scalable property will |
323 | /// be set and the runtime size will be a positive integer multiple of the |
324 | /// base size. |
325 | TypeSize getStoreSizeInBits() const { |
326 | return getStoreSize() * 8; |
327 | } |
328 | |
329 | /// Rounds the bit-width of the given integer EVT up to the nearest power of |
330 | /// two (and at least to eight), and returns the integer EVT with that |
331 | /// number of bits. |
332 | EVT getRoundIntegerType(LLVMContext &Context) const { |
333 | assert(isInteger() && !isVector() && "Invalid integer type!")((isInteger() && !isVector() && "Invalid integer type!" ) ? static_cast<void> (0) : __assert_fail ("isInteger() && !isVector() && \"Invalid integer type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 333, __PRETTY_FUNCTION__)); |
334 | unsigned BitWidth = getSizeInBits(); |
335 | if (BitWidth <= 8) |
336 | return EVT(MVT::i8); |
337 | return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth)); |
338 | } |
339 | |
340 | /// Finds the smallest simple value type that is greater than or equal to |
341 | /// half the width of this EVT. If no simple value type can be found, an |
342 | /// extended integer value type of half the size (rounded up) is returned. |
343 | EVT getHalfSizedIntegerVT(LLVMContext &Context) const { |
344 | assert(isInteger() && !isVector() && "Invalid integer type!")((isInteger() && !isVector() && "Invalid integer type!" ) ? static_cast<void> (0) : __assert_fail ("isInteger() && !isVector() && \"Invalid integer type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 344, __PRETTY_FUNCTION__)); |
345 | unsigned EVTSize = getSizeInBits(); |
346 | for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; |
347 | IntVT <= MVT::LAST_INTEGER_VALUETYPE; ++IntVT) { |
348 | EVT HalfVT = EVT((MVT::SimpleValueType)IntVT); |
349 | if (HalfVT.getSizeInBits() * 2 >= EVTSize) |
350 | return HalfVT; |
351 | } |
352 | return getIntegerVT(Context, (EVTSize + 1) / 2); |
353 | } |
354 | |
355 | /// Return a VT for an integer vector type with the size of the |
356 | /// elements doubled. The typed returned may be an extended type. |
357 | EVT widenIntegerVectorElementType(LLVMContext &Context) const { |
358 | EVT EltVT = getVectorElementType(); |
359 | EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits()); |
360 | return EVT::getVectorVT(Context, EltVT, getVectorElementCount()); |
361 | } |
362 | |
363 | // Return a VT for a vector type with the same element type but |
364 | // half the number of elements. The type returned may be an |
365 | // extended type. |
366 | EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { |
367 | EVT EltVT = getVectorElementType(); |
368 | auto EltCnt = getVectorElementCount(); |
369 | assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!")((!(EltCnt.Min & 1) && "Splitting vector, but not in half!" ) ? static_cast<void> (0) : __assert_fail ("!(EltCnt.Min & 1) && \"Splitting vector, but not in half!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ValueTypes.h" , 369, __PRETTY_FUNCTION__)); |
370 | return EVT::getVectorVT(Context, EltVT, EltCnt / 2); |
371 | } |
372 | |
373 | /// Returns true if the given vector is a power of 2. |
374 | bool isPow2VectorType() const { |
375 | unsigned NElts = getVectorNumElements(); |
376 | return !(NElts & (NElts - 1)); |
377 | } |
378 | |
379 | /// Widens the length of the given vector EVT up to the nearest power of 2 |
380 | /// and returns that type. |
381 | EVT getPow2VectorType(LLVMContext &Context) const { |
382 | if (!isPow2VectorType()) { |
383 | unsigned NElts = getVectorNumElements(); |
384 | unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); |
385 | return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts, |
386 | isScalableVector()); |
387 | } |
388 | else { |
389 | return *this; |
390 | } |
391 | } |
392 | |
393 | /// This function returns value type as a string, e.g. "i32". |
394 | std::string getEVTString() const; |
395 | |
396 | /// This method returns an LLVM type corresponding to the specified EVT. |
397 | /// For integer types, this returns an unsigned type. Note that this will |
398 | /// abort for types that cannot be represented. |
399 | Type *getTypeForEVT(LLVMContext &Context) const; |
400 | |
401 | /// Return the value type corresponding to the specified type. |
402 | /// This returns all pointers as iPTR. If HandleUnknown is true, unknown |
403 | /// types are returned as Other, otherwise they are invalid. |
404 | static EVT getEVT(Type *Ty, bool HandleUnknown = false); |
405 | |
406 | intptr_t getRawBits() const { |
407 | if (isSimple()) |
408 | return V.SimpleTy; |
409 | else |
410 | return (intptr_t)(LLVMTy); |
411 | } |
412 | |
413 | /// A meaningless but well-behaved order, useful for constructing |
414 | /// containers. |
415 | struct compareRawBits { |
416 | bool operator()(EVT L, EVT R) const { |
417 | if (L.V.SimpleTy == R.V.SimpleTy) |
418 | return L.LLVMTy < R.LLVMTy; |
419 | else |
420 | return L.V.SimpleTy < R.V.SimpleTy; |
421 | } |
422 | }; |
423 | |
424 | private: |
425 | // Methods for handling the Extended-type case in functions above. |
426 | // These are all out-of-line to prevent users of this header file |
427 | // from having a dependency on Type.h. |
428 | EVT changeExtendedTypeToInteger() const; |
429 | EVT changeExtendedVectorElementTypeToInteger() const; |
430 | static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); |
431 | static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, |
432 | unsigned NumElements); |
433 | bool isExtendedFloatingPoint() const LLVM_READONLY__attribute__((__pure__)); |
434 | bool isExtendedInteger() const LLVM_READONLY__attribute__((__pure__)); |
435 | bool isExtendedScalarInteger() const LLVM_READONLY__attribute__((__pure__)); |
436 | bool isExtendedVector() const LLVM_READONLY__attribute__((__pure__)); |
437 | bool isExtended16BitVector() const LLVM_READONLY__attribute__((__pure__)); |
438 | bool isExtended32BitVector() const LLVM_READONLY__attribute__((__pure__)); |
439 | bool isExtended64BitVector() const LLVM_READONLY__attribute__((__pure__)); |
440 | bool isExtended128BitVector() const LLVM_READONLY__attribute__((__pure__)); |
441 | bool isExtended256BitVector() const LLVM_READONLY__attribute__((__pure__)); |
442 | bool isExtended512BitVector() const LLVM_READONLY__attribute__((__pure__)); |
443 | bool isExtended1024BitVector() const LLVM_READONLY__attribute__((__pure__)); |
444 | bool isExtended2048BitVector() const LLVM_READONLY__attribute__((__pure__)); |
445 | EVT getExtendedVectorElementType() const; |
446 | unsigned getExtendedVectorNumElements() const LLVM_READONLY__attribute__((__pure__)); |
447 | TypeSize getExtendedSizeInBits() const LLVM_READONLY__attribute__((__pure__)); |
448 | }; |
449 | |
450 | } // end namespace llvm |
451 | |
452 | #endif // LLVM_CODEGEN_VALUETYPES_H |
1 | //===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides a struct that can be used to query the size of IR types |
10 | // which may be scalable vectors. It provides convenience operators so that |
11 | // it can be used in much the same way as a single scalar value. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_SUPPORT_TYPESIZE_H |
16 | #define LLVM_SUPPORT_TYPESIZE_H |
17 | |
18 | #include <cstdint> |
19 | #include <cassert> |
20 | |
21 | namespace llvm { |
22 | |
23 | template <typename T> struct DenseMapInfo; |
24 | |
25 | class ElementCount { |
26 | public: |
27 | unsigned Min; // Minimum number of vector elements. |
28 | bool Scalable; // If true, NumElements is a multiple of 'Min' determined |
29 | // at runtime rather than compile time. |
30 | |
31 | ElementCount(unsigned Min, bool Scalable) |
32 | : Min(Min), Scalable(Scalable) {} |
33 | |
34 | ElementCount operator*(unsigned RHS) { |
35 | return { Min * RHS, Scalable }; |
36 | } |
37 | ElementCount operator/(unsigned RHS) { |
38 | return { Min / RHS, Scalable }; |
39 | } |
40 | |
41 | bool operator==(const ElementCount& RHS) const { |
42 | return Min == RHS.Min && Scalable == RHS.Scalable; |
43 | } |
44 | bool operator!=(const ElementCount& RHS) const { |
45 | return !(*this == RHS); |
46 | } |
47 | }; |
48 | |
49 | // This class is used to represent the size of types. If the type is of fixed |
50 | // size, it will represent the exact size. If the type is a scalable vector, |
51 | // it will represent the known minimum size. |
52 | class TypeSize { |
53 | uint64_t MinSize; // The known minimum size. |
54 | bool IsScalable; // If true, then the runtime size is an integer multiple |
55 | // of MinSize. |
56 | |
57 | public: |
58 | constexpr TypeSize(uint64_t MinSize, bool Scalable) |
59 | : MinSize(MinSize), IsScalable(Scalable) {} |
60 | |
61 | static constexpr TypeSize Fixed(uint64_t Size) { |
62 | return TypeSize(Size, /*IsScalable=*/false); |
63 | } |
64 | |
65 | static constexpr TypeSize Scalable(uint64_t MinSize) { |
66 | return TypeSize(MinSize, /*IsScalable=*/true); |
67 | } |
68 | |
69 | // Scalable vector types with the same minimum size as a fixed size type are |
70 | // not guaranteed to be the same size at runtime, so they are never |
71 | // considered to be equal. |
72 | friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) { |
73 | return LHS.MinSize == RHS.MinSize && LHS.IsScalable == RHS.IsScalable; |
74 | } |
75 | |
76 | friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) { |
77 | return !(LHS == RHS); |
78 | } |
79 | |
80 | // For many cases, size ordering between scalable and fixed size types cannot |
81 | // be determined at compile time, so such comparisons aren't allowed. |
82 | // |
83 | // e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime |
84 | // vscale >= 5, equal sized with a vscale of 4, and smaller with |
85 | // a vscale <= 3. |
86 | // |
87 | // If the scalable flags match, just perform the requested comparison |
88 | // between the minimum sizes. |
89 | friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) { |
90 | assert(LHS.IsScalable == RHS.IsScalable &&((LHS.IsScalable == RHS.IsScalable && "Ordering comparison of scalable and fixed types" ) ? static_cast<void> (0) : __assert_fail ("LHS.IsScalable == RHS.IsScalable && \"Ordering comparison of scalable and fixed types\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/TypeSize.h" , 91, __PRETTY_FUNCTION__)) |
91 | "Ordering comparison of scalable and fixed types")((LHS.IsScalable == RHS.IsScalable && "Ordering comparison of scalable and fixed types" ) ? static_cast<void> (0) : __assert_fail ("LHS.IsScalable == RHS.IsScalable && \"Ordering comparison of scalable and fixed types\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/TypeSize.h" , 91, __PRETTY_FUNCTION__)); |
92 | |
93 | return LHS.MinSize < RHS.MinSize; |
94 | } |
95 | |
96 | friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) { |
97 | return RHS < LHS; |
98 | } |
99 | |
100 | friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) { |
101 | return !(RHS < LHS); |
102 | } |
103 | |
104 | friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) { |
105 | return !(LHS < RHS); |
106 | } |
107 | |
108 | // Convenience operators to obtain relative sizes independently of |
109 | // the scalable flag. |
110 | TypeSize operator*(unsigned RHS) const { |
111 | return { MinSize * RHS, IsScalable }; |
112 | } |
113 | |
114 | friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { |
115 | return { LHS * RHS.MinSize, RHS.IsScalable }; |
116 | } |
117 | |
118 | TypeSize operator/(unsigned RHS) const { |
119 | return { MinSize / RHS, IsScalable }; |
120 | } |
121 | |
122 | // Return the minimum size with the assumption that the size is exact. |
123 | // Use in places where a scalable size doesn't make sense (e.g. non-vector |
124 | // types, or vectors in backends which don't support scalable vectors). |
125 | uint64_t getFixedSize() const { |
126 | assert(!IsScalable && "Request for a fixed size on a scalable object")((!IsScalable && "Request for a fixed size on a scalable object" ) ? static_cast<void> (0) : __assert_fail ("!IsScalable && \"Request for a fixed size on a scalable object\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/TypeSize.h" , 126, __PRETTY_FUNCTION__)); |
127 | return MinSize; |
128 | } |
129 | |
130 | // Return the known minimum size. Use in places where the scalable property |
131 | // doesn't matter (e.g. determining alignment) or in conjunction with the |
132 | // isScalable method below. |
133 | uint64_t getKnownMinSize() const { |
134 | return MinSize; |
135 | } |
136 | |
137 | // Return whether or not the size is scalable. |
138 | bool isScalable() const { |
139 | return IsScalable; |
140 | } |
141 | |
142 | // Returns true if the number of bits is a multiple of an 8-bit byte. |
143 | bool isByteSized() const { |
144 | return (MinSize & 7) == 0; |
145 | } |
146 | |
147 | // Casts to a uint64_t if this is a fixed-width size. |
148 | // |
149 | // NOTE: This interface is obsolete and will be removed in a future version |
150 | // of LLVM in favour of calling getFixedSize() directly. |
151 | operator uint64_t() const { |
152 | return getFixedSize(); |
153 | } |
154 | |
155 | // Additional convenience operators needed to avoid ambiguous parses. |
156 | // TODO: Make uint64_t the default operator? |
157 | TypeSize operator*(uint64_t RHS) const { |
158 | return { MinSize * RHS, IsScalable }; |
159 | } |
160 | |
161 | TypeSize operator*(int RHS) const { |
162 | return { MinSize * RHS, IsScalable }; |
163 | } |
164 | |
165 | TypeSize operator*(int64_t RHS) const { |
166 | return { MinSize * RHS, IsScalable }; |
167 | } |
168 | |
169 | friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { |
170 | return { LHS * RHS.MinSize, RHS.IsScalable }; |
171 | } |
172 | |
173 | friend TypeSize operator*(const int LHS, const TypeSize &RHS) { |
174 | return { LHS * RHS.MinSize, RHS.IsScalable }; |
175 | } |
176 | |
177 | friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { |
178 | return { LHS * RHS.MinSize, RHS.IsScalable }; |
179 | } |
180 | |
181 | TypeSize operator/(uint64_t RHS) const { |
182 | return { MinSize / RHS, IsScalable }; |
183 | } |
184 | |
185 | TypeSize operator/(int RHS) const { |
186 | return { MinSize / RHS, IsScalable }; |
187 | } |
188 | |
189 | TypeSize operator/(int64_t RHS) const { |
190 | return { MinSize / RHS, IsScalable }; |
191 | } |
192 | }; |
193 | |
194 | /// Returns a TypeSize with a known minimum size that is the next integer |
195 | /// (mod 2**64) that is greater than or equal to \p Value and is a multiple |
196 | /// of \p Align. \p Align must be non-zero. |
197 | /// |
198 | /// Similar to the alignTo functions in MathExtras.h |
199 | inline TypeSize alignTo(TypeSize Size, uint64_t Align) { |
200 | assert(Align != 0u && "Align must be non-zero")((Align != 0u && "Align must be non-zero") ? static_cast <void> (0) : __assert_fail ("Align != 0u && \"Align must be non-zero\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/TypeSize.h" , 200, __PRETTY_FUNCTION__)); |
201 | return {(Size.getKnownMinSize() + Align - 1) / Align * Align, |
202 | Size.isScalable()}; |
203 | } |
204 | |
205 | template <> struct DenseMapInfo<ElementCount> { |
206 | static inline ElementCount getEmptyKey() { return {~0U, true}; } |
207 | static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; } |
208 | static unsigned getHashValue(const ElementCount& EltCnt) { |
209 | if (EltCnt.Scalable) |
210 | return (EltCnt.Min * 37U) - 1U; |
211 | |
212 | return EltCnt.Min * 37U; |
213 | } |
214 | |
215 | static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) { |
216 | return LHS == RHS; |
217 | } |
218 | }; |
219 | |
220 | } // end namespace llvm |
221 | |
222 | #endif // LLVM_SUPPORT_TypeSize_H |