File: | llvm/lib/Target/X86/X86ISelLowering.cpp |
Warning: | line 33158, column 45 The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'size_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that X86 uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "X86ISelLowering.h" |
15 | #include "Utils/X86ShuffleDecode.h" |
16 | #include "X86CallingConv.h" |
17 | #include "X86FrameLowering.h" |
18 | #include "X86InstrBuilder.h" |
19 | #include "X86IntrinsicsInfo.h" |
20 | #include "X86MachineFunctionInfo.h" |
21 | #include "X86TargetMachine.h" |
22 | #include "X86TargetObjectFile.h" |
23 | #include "llvm/ADT/SmallBitVector.h" |
24 | #include "llvm/ADT/SmallSet.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/ADT/StringExtras.h" |
27 | #include "llvm/ADT/StringSwitch.h" |
28 | #include "llvm/Analysis/EHPersonalities.h" |
29 | #include "llvm/CodeGen/IntrinsicLowering.h" |
30 | #include "llvm/CodeGen/MachineFrameInfo.h" |
31 | #include "llvm/CodeGen/MachineFunction.h" |
32 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
33 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
34 | #include "llvm/CodeGen/MachineModuleInfo.h" |
35 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
36 | #include "llvm/CodeGen/TargetLowering.h" |
37 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
38 | #include "llvm/IR/CallSite.h" |
39 | #include "llvm/IR/CallingConv.h" |
40 | #include "llvm/IR/Constants.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/DiagnosticInfo.h" |
43 | #include "llvm/IR/Function.h" |
44 | #include "llvm/IR/GlobalAlias.h" |
45 | #include "llvm/IR/GlobalVariable.h" |
46 | #include "llvm/IR/Instructions.h" |
47 | #include "llvm/IR/Intrinsics.h" |
48 | #include "llvm/MC/MCAsmInfo.h" |
49 | #include "llvm/MC/MCContext.h" |
50 | #include "llvm/MC/MCExpr.h" |
51 | #include "llvm/MC/MCSymbol.h" |
52 | #include "llvm/Support/CommandLine.h" |
53 | #include "llvm/Support/Debug.h" |
54 | #include "llvm/Support/ErrorHandling.h" |
55 | #include "llvm/Support/KnownBits.h" |
56 | #include "llvm/Support/MathExtras.h" |
57 | #include "llvm/Target/TargetOptions.h" |
58 | #include <algorithm> |
59 | #include <bitset> |
60 | #include <cctype> |
61 | #include <numeric> |
62 | using namespace llvm; |
63 | |
64 | #define DEBUG_TYPE"x86-isel" "x86-isel" |
65 | |
66 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls" , "Number of tail calls"}; |
67 | |
68 | static cl::opt<int> ExperimentalPrefLoopAlignment( |
69 | "x86-experimental-pref-loop-alignment", cl::init(4), |
70 | cl::desc( |
71 | "Sets the preferable loop alignment for experiments (as log2 bytes)" |
72 | "(the last x86-experimental-pref-loop-alignment bits" |
73 | " of the loop header PC will be 0)."), |
74 | cl::Hidden); |
75 | |
76 | // Added in 10.0. |
77 | static cl::opt<bool> EnableOldKNLABI( |
78 | "x86-enable-old-knl-abi", cl::init(false), |
79 | cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of " |
80 | "one ZMM register on AVX512F, but not AVX512BW targets."), |
81 | cl::Hidden); |
82 | |
83 | static cl::opt<bool> MulConstantOptimization( |
84 | "mul-constant-optimization", cl::init(true), |
85 | cl::desc("Replace 'mul x, Const' with more effective instructions like " |
86 | "SHIFT, LEA, etc."), |
87 | cl::Hidden); |
88 | |
89 | static cl::opt<bool> ExperimentalUnorderedISEL( |
90 | "x86-experimental-unordered-atomic-isel", cl::init(false), |
91 | cl::desc("Use LoadSDNode and StoreSDNode instead of " |
92 | "AtomicSDNode for unordered atomic loads and " |
93 | "stores respectively."), |
94 | cl::Hidden); |
95 | |
96 | /// Call this when the user attempts to do something unsupported, like |
97 | /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike |
98 | /// report_fatal_error, so calling code should attempt to recover without |
99 | /// crashing. |
100 | static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
101 | const char *Msg) { |
102 | MachineFunction &MF = DAG.getMachineFunction(); |
103 | DAG.getContext()->diagnose( |
104 | DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
105 | } |
106 | |
107 | X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, |
108 | const X86Subtarget &STI) |
109 | : TargetLowering(TM), Subtarget(STI) { |
110 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
111 | X86ScalarSSEf64 = Subtarget.hasSSE2(); |
112 | X86ScalarSSEf32 = Subtarget.hasSSE1(); |
113 | MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
114 | |
115 | // Set up the TargetLowering object. |
116 | |
117 | // X86 is weird. It always uses i8 for shift amounts and setcc results. |
118 | setBooleanContents(ZeroOrOneBooleanContent); |
119 | // X86-SSE is even stranger. It uses -1 or 0 for vector masks. |
120 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
121 | |
122 | // For 64-bit, since we have so many registers, use the ILP scheduler. |
123 | // For 32-bit, use the register pressure specific scheduling. |
124 | // For Atom, always use ILP scheduling. |
125 | if (Subtarget.isAtom()) |
126 | setSchedulingPreference(Sched::ILP); |
127 | else if (Subtarget.is64Bit()) |
128 | setSchedulingPreference(Sched::ILP); |
129 | else |
130 | setSchedulingPreference(Sched::RegPressure); |
131 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
132 | setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); |
133 | |
134 | // Bypass expensive divides and use cheaper ones. |
135 | if (TM.getOptLevel() >= CodeGenOpt::Default) { |
136 | if (Subtarget.hasSlowDivide32()) |
137 | addBypassSlowDiv(32, 8); |
138 | if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) |
139 | addBypassSlowDiv(64, 32); |
140 | } |
141 | |
142 | if (Subtarget.isTargetWindowsMSVC() || |
143 | Subtarget.isTargetWindowsItanium()) { |
144 | // Setup Windows compiler runtime calls. |
145 | setLibcallName(RTLIB::SDIV_I64, "_alldiv"); |
146 | setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); |
147 | setLibcallName(RTLIB::SREM_I64, "_allrem"); |
148 | setLibcallName(RTLIB::UREM_I64, "_aullrem"); |
149 | setLibcallName(RTLIB::MUL_I64, "_allmul"); |
150 | setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); |
151 | setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); |
152 | setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); |
153 | setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); |
154 | setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); |
155 | } |
156 | |
157 | if (Subtarget.getTargetTriple().isOSMSVCRT()) { |
158 | // MSVCRT doesn't have powi; fall back to pow |
159 | setLibcallName(RTLIB::POWI_F32, nullptr); |
160 | setLibcallName(RTLIB::POWI_F64, nullptr); |
161 | } |
162 | |
163 | if (Subtarget.isTargetDarwin()) { |
164 | // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. |
165 | setUseUnderscoreSetJmp(false); |
166 | setUseUnderscoreLongJmp(false); |
167 | } else if (Subtarget.isTargetWindowsGNU()) { |
168 | // MS runtime is weird: it exports _setjmp, but longjmp! |
169 | setUseUnderscoreSetJmp(true); |
170 | setUseUnderscoreLongJmp(false); |
171 | } else { |
172 | setUseUnderscoreSetJmp(true); |
173 | setUseUnderscoreLongJmp(true); |
174 | } |
175 | |
176 | // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to |
177 | // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. |
178 | // FIXME: Should we be limitting the atomic size on other configs? Default is |
179 | // 1024. |
180 | if (!Subtarget.hasCmpxchg8b()) |
181 | setMaxAtomicSizeInBitsSupported(32); |
182 | |
183 | // Set up the register classes. |
184 | addRegisterClass(MVT::i8, &X86::GR8RegClass); |
185 | addRegisterClass(MVT::i16, &X86::GR16RegClass); |
186 | addRegisterClass(MVT::i32, &X86::GR32RegClass); |
187 | if (Subtarget.is64Bit()) |
188 | addRegisterClass(MVT::i64, &X86::GR64RegClass); |
189 | |
190 | for (MVT VT : MVT::integer_valuetypes()) |
191 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
192 | |
193 | // We don't accept any truncstore of integer registers. |
194 | setTruncStoreAction(MVT::i64, MVT::i32, Expand); |
195 | setTruncStoreAction(MVT::i64, MVT::i16, Expand); |
196 | setTruncStoreAction(MVT::i64, MVT::i8 , Expand); |
197 | setTruncStoreAction(MVT::i32, MVT::i16, Expand); |
198 | setTruncStoreAction(MVT::i32, MVT::i8 , Expand); |
199 | setTruncStoreAction(MVT::i16, MVT::i8, Expand); |
200 | |
201 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
202 | |
203 | // SETOEQ and SETUNE require checking two conditions. |
204 | setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); |
205 | setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); |
206 | setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); |
207 | setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); |
208 | setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); |
209 | setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); |
210 | |
211 | // Integer absolute. |
212 | if (Subtarget.hasCMov()) { |
213 | setOperationAction(ISD::ABS , MVT::i16 , Custom); |
214 | setOperationAction(ISD::ABS , MVT::i32 , Custom); |
215 | } |
216 | setOperationAction(ISD::ABS , MVT::i64 , Custom); |
217 | |
218 | // Funnel shifts. |
219 | for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { |
220 | setOperationAction(ShiftOp , MVT::i16 , Custom); |
221 | setOperationAction(ShiftOp , MVT::i32 , Custom); |
222 | if (Subtarget.is64Bit()) |
223 | setOperationAction(ShiftOp , MVT::i64 , Custom); |
224 | } |
225 | |
226 | // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this |
227 | // operation. |
228 | setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); |
229 | setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); |
230 | setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); |
231 | |
232 | if (!Subtarget.useSoftFloat()) { |
233 | // We have an algorithm for SSE2->double, and we turn this into a |
234 | // 64-bit FILD followed by conditional FADD for other targets. |
235 | setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); |
236 | // We have an algorithm for SSE2, and we turn this into a 64-bit |
237 | // FILD or VCVTUSI2SS/SD for other targets. |
238 | setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); |
239 | } else { |
240 | setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); |
241 | } |
242 | |
243 | // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have |
244 | // this operation. |
245 | setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); |
246 | setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); |
247 | |
248 | if (!Subtarget.useSoftFloat()) { |
249 | // SSE has no i16 to fp conversion, only i32. |
250 | if (X86ScalarSSEf32) { |
251 | setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); |
252 | // f32 and f64 cases are Legal, f80 case is not |
253 | setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); |
254 | } else { |
255 | setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); |
256 | setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); |
257 | } |
258 | } else { |
259 | setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); |
260 | setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand); |
261 | } |
262 | |
263 | // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have |
264 | // this operation. |
265 | setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); |
266 | setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); |
267 | |
268 | if (!Subtarget.useSoftFloat()) { |
269 | // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 |
270 | // are Legal, f80 is custom lowered. |
271 | setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); |
272 | setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); |
273 | |
274 | setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); |
275 | setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); |
276 | } else { |
277 | setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); |
278 | setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); |
279 | setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); |
280 | } |
281 | |
282 | // Handle FP_TO_UINT by promoting the destination to a larger signed |
283 | // conversion. |
284 | setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); |
285 | setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); |
286 | setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); |
287 | |
288 | if (!Subtarget.useSoftFloat()) { |
289 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
290 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
291 | } |
292 | |
293 | // TODO: when we have SSE, these could be more efficient, by using movd/movq. |
294 | if (!X86ScalarSSEf64) { |
295 | setOperationAction(ISD::BITCAST , MVT::f32 , Expand); |
296 | setOperationAction(ISD::BITCAST , MVT::i32 , Expand); |
297 | if (Subtarget.is64Bit()) { |
298 | setOperationAction(ISD::BITCAST , MVT::f64 , Expand); |
299 | // Without SSE, i64->f64 goes through memory. |
300 | setOperationAction(ISD::BITCAST , MVT::i64 , Expand); |
301 | } |
302 | } else if (!Subtarget.is64Bit()) |
303 | setOperationAction(ISD::BITCAST , MVT::i64 , Custom); |
304 | |
305 | // Scalar integer divide and remainder are lowered to use operations that |
306 | // produce two results, to match the available instructions. This exposes |
307 | // the two-result form to trivial CSE, which is able to combine x/y and x%y |
308 | // into a single instruction. |
309 | // |
310 | // Scalar integer multiply-high is also lowered to use two-result |
311 | // operations, to match the available instructions. However, plain multiply |
312 | // (low) operations are left as Legal, as there are single-result |
313 | // instructions for this in x86. Using the two-result multiply instructions |
314 | // when both high and low results are needed must be arranged by dagcombine. |
315 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
316 | setOperationAction(ISD::MULHS, VT, Expand); |
317 | setOperationAction(ISD::MULHU, VT, Expand); |
318 | setOperationAction(ISD::SDIV, VT, Expand); |
319 | setOperationAction(ISD::UDIV, VT, Expand); |
320 | setOperationAction(ISD::SREM, VT, Expand); |
321 | setOperationAction(ISD::UREM, VT, Expand); |
322 | } |
323 | |
324 | setOperationAction(ISD::BR_JT , MVT::Other, Expand); |
325 | setOperationAction(ISD::BRCOND , MVT::Other, Custom); |
326 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, |
327 | MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
328 | setOperationAction(ISD::BR_CC, VT, Expand); |
329 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
330 | } |
331 | if (Subtarget.is64Bit()) |
332 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
333 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); |
334 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
335 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); |
336 | |
337 | setOperationAction(ISD::FREM , MVT::f32 , Expand); |
338 | setOperationAction(ISD::FREM , MVT::f64 , Expand); |
339 | setOperationAction(ISD::FREM , MVT::f80 , Expand); |
340 | setOperationAction(ISD::FREM , MVT::f128 , Expand); |
341 | setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); |
342 | |
343 | // Promote the i8 variants and force them on up to i32 which has a shorter |
344 | // encoding. |
345 | setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); |
346 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
347 | if (!Subtarget.hasBMI()) { |
348 | setOperationAction(ISD::CTTZ , MVT::i16 , Custom); |
349 | setOperationAction(ISD::CTTZ , MVT::i32 , Custom); |
350 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); |
351 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); |
352 | if (Subtarget.is64Bit()) { |
353 | setOperationAction(ISD::CTTZ , MVT::i64 , Custom); |
354 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); |
355 | } |
356 | } |
357 | |
358 | if (Subtarget.hasLZCNT()) { |
359 | // When promoting the i8 variants, force them to i32 for a shorter |
360 | // encoding. |
361 | setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); |
362 | setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
363 | } else { |
364 | setOperationAction(ISD::CTLZ , MVT::i8 , Custom); |
365 | setOperationAction(ISD::CTLZ , MVT::i16 , Custom); |
366 | setOperationAction(ISD::CTLZ , MVT::i32 , Custom); |
367 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); |
368 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); |
369 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); |
370 | if (Subtarget.is64Bit()) { |
371 | setOperationAction(ISD::CTLZ , MVT::i64 , Custom); |
372 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); |
373 | } |
374 | } |
375 | |
376 | // Special handling for half-precision floating point conversions. |
377 | // If we don't have F16C support, then lower half float conversions |
378 | // into library calls. |
379 | if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { |
380 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
381 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
382 | } |
383 | |
384 | // There's never any support for operations beyond MVT::f32. |
385 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
386 | setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); |
387 | setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand); |
388 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
389 | setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); |
390 | setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand); |
391 | |
392 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
393 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
394 | setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); |
395 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); |
396 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
397 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
398 | setTruncStoreAction(MVT::f80, MVT::f16, Expand); |
399 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
400 | |
401 | if (Subtarget.hasPOPCNT()) { |
402 | setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); |
403 | } else { |
404 | setOperationAction(ISD::CTPOP , MVT::i8 , Expand); |
405 | setOperationAction(ISD::CTPOP , MVT::i16 , Expand); |
406 | setOperationAction(ISD::CTPOP , MVT::i32 , Expand); |
407 | if (Subtarget.is64Bit()) |
408 | setOperationAction(ISD::CTPOP , MVT::i64 , Expand); |
409 | else |
410 | setOperationAction(ISD::CTPOP , MVT::i64 , Custom); |
411 | } |
412 | |
413 | setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); |
414 | |
415 | if (!Subtarget.hasMOVBE()) |
416 | setOperationAction(ISD::BSWAP , MVT::i16 , Expand); |
417 | |
418 | // These should be promoted to a larger select which is supported. |
419 | setOperationAction(ISD::SELECT , MVT::i1 , Promote); |
420 | // X86 wants to expand cmov itself. |
421 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { |
422 | setOperationAction(ISD::SELECT, VT, Custom); |
423 | setOperationAction(ISD::SETCC, VT, Custom); |
424 | } |
425 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
426 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
427 | continue; |
428 | setOperationAction(ISD::SELECT, VT, Custom); |
429 | setOperationAction(ISD::SETCC, VT, Custom); |
430 | } |
431 | |
432 | // Custom action for SELECT MMX and expand action for SELECT_CC MMX |
433 | setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); |
434 | setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); |
435 | |
436 | setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); |
437 | // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since |
438 | // LLVM/Clang supports zero-cost DWARF and SEH exception handling. |
439 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
440 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
441 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
442 | if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) |
443 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
444 | |
445 | // Darwin ABI issue. |
446 | for (auto VT : { MVT::i32, MVT::i64 }) { |
447 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
448 | continue; |
449 | setOperationAction(ISD::ConstantPool , VT, Custom); |
450 | setOperationAction(ISD::JumpTable , VT, Custom); |
451 | setOperationAction(ISD::GlobalAddress , VT, Custom); |
452 | setOperationAction(ISD::GlobalTLSAddress, VT, Custom); |
453 | setOperationAction(ISD::ExternalSymbol , VT, Custom); |
454 | setOperationAction(ISD::BlockAddress , VT, Custom); |
455 | } |
456 | |
457 | // 64-bit shl, sra, srl (iff 32-bit x86) |
458 | for (auto VT : { MVT::i32, MVT::i64 }) { |
459 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
460 | continue; |
461 | setOperationAction(ISD::SHL_PARTS, VT, Custom); |
462 | setOperationAction(ISD::SRA_PARTS, VT, Custom); |
463 | setOperationAction(ISD::SRL_PARTS, VT, Custom); |
464 | } |
465 | |
466 | if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) |
467 | setOperationAction(ISD::PREFETCH , MVT::Other, Legal); |
468 | |
469 | setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); |
470 | |
471 | // Expand certain atomics |
472 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
473 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); |
474 | setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); |
475 | setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); |
476 | setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); |
477 | setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); |
478 | setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); |
479 | setOperationAction(ISD::ATOMIC_STORE, VT, Custom); |
480 | } |
481 | |
482 | if (!Subtarget.is64Bit()) |
483 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); |
484 | |
485 | if (Subtarget.hasCmpxchg16b()) { |
486 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
487 | } |
488 | |
489 | // FIXME - use subtarget debug flags |
490 | if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && |
491 | !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && |
492 | TM.Options.ExceptionModel != ExceptionHandling::SjLj) { |
493 | setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); |
494 | } |
495 | |
496 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); |
497 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); |
498 | |
499 | setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
500 | setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
501 | |
502 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
503 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
504 | |
505 | // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
506 | setOperationAction(ISD::VASTART , MVT::Other, Custom); |
507 | setOperationAction(ISD::VAEND , MVT::Other, Expand); |
508 | bool Is64Bit = Subtarget.is64Bit(); |
509 | setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); |
510 | setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); |
511 | |
512 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
513 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
514 | |
515 | setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); |
516 | |
517 | // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. |
518 | setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); |
519 | setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); |
520 | |
521 | if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { |
522 | // f32 and f64 use SSE. |
523 | // Set up the FP register classes. |
524 | addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass |
525 | : &X86::FR32RegClass); |
526 | addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass |
527 | : &X86::FR64RegClass); |
528 | |
529 | // Disable f32->f64 extload as we can only generate this in one instruction |
530 | // under optsize. So its easier to pattern match (fpext (load)) for that |
531 | // case instead of needing to emit 2 instructions for extload in the |
532 | // non-optsize case. |
533 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
534 | |
535 | for (auto VT : { MVT::f32, MVT::f64 }) { |
536 | // Use ANDPD to simulate FABS. |
537 | setOperationAction(ISD::FABS, VT, Custom); |
538 | |
539 | // Use XORP to simulate FNEG. |
540 | setOperationAction(ISD::FNEG, VT, Custom); |
541 | |
542 | // Use ANDPD and ORPD to simulate FCOPYSIGN. |
543 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
544 | |
545 | // These might be better off as horizontal vector ops. |
546 | setOperationAction(ISD::FADD, VT, Custom); |
547 | setOperationAction(ISD::FSUB, VT, Custom); |
548 | |
549 | // We don't support sin/cos/fmod |
550 | setOperationAction(ISD::FSIN , VT, Expand); |
551 | setOperationAction(ISD::FCOS , VT, Expand); |
552 | setOperationAction(ISD::FSINCOS, VT, Expand); |
553 | } |
554 | |
555 | // Lower this to MOVMSK plus an AND. |
556 | setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); |
557 | setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); |
558 | |
559 | } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) { |
560 | // Use SSE for f32, x87 for f64. |
561 | // Set up the FP register classes. |
562 | addRegisterClass(MVT::f32, &X86::FR32RegClass); |
563 | if (UseX87) |
564 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
565 | |
566 | // Use ANDPS to simulate FABS. |
567 | setOperationAction(ISD::FABS , MVT::f32, Custom); |
568 | |
569 | // Use XORP to simulate FNEG. |
570 | setOperationAction(ISD::FNEG , MVT::f32, Custom); |
571 | |
572 | if (UseX87) |
573 | setOperationAction(ISD::UNDEF, MVT::f64, Expand); |
574 | |
575 | // Use ANDPS and ORPS to simulate FCOPYSIGN. |
576 | if (UseX87) |
577 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
578 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
579 | |
580 | // We don't support sin/cos/fmod |
581 | setOperationAction(ISD::FSIN , MVT::f32, Expand); |
582 | setOperationAction(ISD::FCOS , MVT::f32, Expand); |
583 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
584 | |
585 | if (UseX87) { |
586 | // Always expand sin/cos functions even though x87 has an instruction. |
587 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
588 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
589 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
590 | } |
591 | } else if (UseX87) { |
592 | // f32 and f64 in x87. |
593 | // Set up the FP register classes. |
594 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
595 | addRegisterClass(MVT::f32, &X86::RFP32RegClass); |
596 | |
597 | for (auto VT : { MVT::f32, MVT::f64 }) { |
598 | setOperationAction(ISD::UNDEF, VT, Expand); |
599 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
600 | |
601 | // Always expand sin/cos functions even though x87 has an instruction. |
602 | setOperationAction(ISD::FSIN , VT, Expand); |
603 | setOperationAction(ISD::FCOS , VT, Expand); |
604 | setOperationAction(ISD::FSINCOS, VT, Expand); |
605 | } |
606 | } |
607 | |
608 | // Expand FP32 immediates into loads from the stack, save special cases. |
609 | if (isTypeLegal(MVT::f32)) { |
610 | if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { |
611 | addLegalFPImmediate(APFloat(+0.0f)); // FLD0 |
612 | addLegalFPImmediate(APFloat(+1.0f)); // FLD1 |
613 | addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS |
614 | addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS |
615 | } else // SSE immediates. |
616 | addLegalFPImmediate(APFloat(+0.0f)); // xorps |
617 | } |
618 | // Expand FP64 immediates into loads from the stack, save special cases. |
619 | if (isTypeLegal(MVT::f64)) { |
620 | if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { |
621 | addLegalFPImmediate(APFloat(+0.0)); // FLD0 |
622 | addLegalFPImmediate(APFloat(+1.0)); // FLD1 |
623 | addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS |
624 | addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS |
625 | } else // SSE immediates. |
626 | addLegalFPImmediate(APFloat(+0.0)); // xorpd |
627 | } |
628 | |
629 | // We don't support FMA. |
630 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
631 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
632 | |
633 | // f80 always uses X87. |
634 | if (UseX87) { |
635 | addRegisterClass(MVT::f80, &X86::RFP80RegClass); |
636 | setOperationAction(ISD::UNDEF, MVT::f80, Expand); |
637 | setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); |
638 | { |
639 | APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); |
640 | addLegalFPImmediate(TmpFlt); // FLD0 |
641 | TmpFlt.changeSign(); |
642 | addLegalFPImmediate(TmpFlt); // FLD0/FCHS |
643 | |
644 | bool ignored; |
645 | APFloat TmpFlt2(+1.0); |
646 | TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
647 | &ignored); |
648 | addLegalFPImmediate(TmpFlt2); // FLD1 |
649 | TmpFlt2.changeSign(); |
650 | addLegalFPImmediate(TmpFlt2); // FLD1/FCHS |
651 | } |
652 | |
653 | // Always expand sin/cos functions even though x87 has an instruction. |
654 | setOperationAction(ISD::FSIN , MVT::f80, Expand); |
655 | setOperationAction(ISD::FCOS , MVT::f80, Expand); |
656 | setOperationAction(ISD::FSINCOS, MVT::f80, Expand); |
657 | |
658 | setOperationAction(ISD::FFLOOR, MVT::f80, Expand); |
659 | setOperationAction(ISD::FCEIL, MVT::f80, Expand); |
660 | setOperationAction(ISD::FTRUNC, MVT::f80, Expand); |
661 | setOperationAction(ISD::FRINT, MVT::f80, Expand); |
662 | setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); |
663 | setOperationAction(ISD::FMA, MVT::f80, Expand); |
664 | setOperationAction(ISD::LROUND, MVT::f80, Expand); |
665 | setOperationAction(ISD::LLROUND, MVT::f80, Expand); |
666 | setOperationAction(ISD::LRINT, MVT::f80, Expand); |
667 | setOperationAction(ISD::LLRINT, MVT::f80, Expand); |
668 | } |
669 | |
670 | // f128 uses xmm registers, but most operations require libcalls. |
671 | if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { |
672 | addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass |
673 | : &X86::VR128RegClass); |
674 | |
675 | addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps |
676 | |
677 | setOperationAction(ISD::FADD, MVT::f128, Custom); |
678 | setOperationAction(ISD::FSUB, MVT::f128, Custom); |
679 | setOperationAction(ISD::FDIV, MVT::f128, Custom); |
680 | setOperationAction(ISD::FMUL, MVT::f128, Custom); |
681 | setOperationAction(ISD::FMA, MVT::f128, Expand); |
682 | |
683 | setOperationAction(ISD::FABS, MVT::f128, Custom); |
684 | setOperationAction(ISD::FNEG, MVT::f128, Custom); |
685 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); |
686 | |
687 | setOperationAction(ISD::FSIN, MVT::f128, Expand); |
688 | setOperationAction(ISD::FCOS, MVT::f128, Expand); |
689 | setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
690 | setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
691 | |
692 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
693 | // We need to custom handle any FP_ROUND with an f128 input, but |
694 | // LegalizeDAG uses the result type to know when to run a custom handler. |
695 | // So we have to list all legal floating point result types here. |
696 | if (isTypeLegal(MVT::f32)) { |
697 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
698 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
699 | } |
700 | if (isTypeLegal(MVT::f64)) { |
701 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
702 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
703 | } |
704 | if (isTypeLegal(MVT::f80)) { |
705 | setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); |
706 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); |
707 | } |
708 | |
709 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
710 | |
711 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
712 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
713 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); |
714 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
715 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
716 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
717 | } |
718 | |
719 | // Always use a library call for pow. |
720 | setOperationAction(ISD::FPOW , MVT::f32 , Expand); |
721 | setOperationAction(ISD::FPOW , MVT::f64 , Expand); |
722 | setOperationAction(ISD::FPOW , MVT::f80 , Expand); |
723 | setOperationAction(ISD::FPOW , MVT::f128 , Expand); |
724 | |
725 | setOperationAction(ISD::FLOG, MVT::f80, Expand); |
726 | setOperationAction(ISD::FLOG2, MVT::f80, Expand); |
727 | setOperationAction(ISD::FLOG10, MVT::f80, Expand); |
728 | setOperationAction(ISD::FEXP, MVT::f80, Expand); |
729 | setOperationAction(ISD::FEXP2, MVT::f80, Expand); |
730 | setOperationAction(ISD::FMINNUM, MVT::f80, Expand); |
731 | setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); |
732 | |
733 | // Some FP actions are always expanded for vector types. |
734 | for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, |
735 | MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { |
736 | setOperationAction(ISD::FSIN, VT, Expand); |
737 | setOperationAction(ISD::FSINCOS, VT, Expand); |
738 | setOperationAction(ISD::FCOS, VT, Expand); |
739 | setOperationAction(ISD::FREM, VT, Expand); |
740 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
741 | setOperationAction(ISD::FPOW, VT, Expand); |
742 | setOperationAction(ISD::FLOG, VT, Expand); |
743 | setOperationAction(ISD::FLOG2, VT, Expand); |
744 | setOperationAction(ISD::FLOG10, VT, Expand); |
745 | setOperationAction(ISD::FEXP, VT, Expand); |
746 | setOperationAction(ISD::FEXP2, VT, Expand); |
747 | } |
748 | |
749 | // First set operation action for all vector types to either promote |
750 | // (for widening) or expand (for scalarization). Then we will selectively |
751 | // turn on ones that can be effectively codegen'd. |
752 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
753 | setOperationAction(ISD::SDIV, VT, Expand); |
754 | setOperationAction(ISD::UDIV, VT, Expand); |
755 | setOperationAction(ISD::SREM, VT, Expand); |
756 | setOperationAction(ISD::UREM, VT, Expand); |
757 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); |
758 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
759 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); |
760 | setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); |
761 | setOperationAction(ISD::FMA, VT, Expand); |
762 | setOperationAction(ISD::FFLOOR, VT, Expand); |
763 | setOperationAction(ISD::FCEIL, VT, Expand); |
764 | setOperationAction(ISD::FTRUNC, VT, Expand); |
765 | setOperationAction(ISD::FRINT, VT, Expand); |
766 | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
767 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
768 | setOperationAction(ISD::MULHS, VT, Expand); |
769 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
770 | setOperationAction(ISD::MULHU, VT, Expand); |
771 | setOperationAction(ISD::SDIVREM, VT, Expand); |
772 | setOperationAction(ISD::UDIVREM, VT, Expand); |
773 | setOperationAction(ISD::CTPOP, VT, Expand); |
774 | setOperationAction(ISD::CTTZ, VT, Expand); |
775 | setOperationAction(ISD::CTLZ, VT, Expand); |
776 | setOperationAction(ISD::ROTL, VT, Expand); |
777 | setOperationAction(ISD::ROTR, VT, Expand); |
778 | setOperationAction(ISD::BSWAP, VT, Expand); |
779 | setOperationAction(ISD::SETCC, VT, Expand); |
780 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
781 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
782 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
783 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
784 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); |
785 | setOperationAction(ISD::TRUNCATE, VT, Expand); |
786 | setOperationAction(ISD::SIGN_EXTEND, VT, Expand); |
787 | setOperationAction(ISD::ZERO_EXTEND, VT, Expand); |
788 | setOperationAction(ISD::ANY_EXTEND, VT, Expand); |
789 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
790 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
791 | setTruncStoreAction(InnerVT, VT, Expand); |
792 | |
793 | setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); |
794 | setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); |
795 | |
796 | // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like |
797 | // types, we have to deal with them whether we ask for Expansion or not. |
798 | // Setting Expand causes its own optimisation problems though, so leave |
799 | // them legal. |
800 | if (VT.getVectorElementType() == MVT::i1) |
801 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
802 | |
803 | // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are |
804 | // split/scalarized right now. |
805 | if (VT.getVectorElementType() == MVT::f16) |
806 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
807 | } |
808 | } |
809 | |
810 | // FIXME: In order to prevent SSE instructions being expanded to MMX ones |
811 | // with -msoft-float, disable use of MMX as well. |
812 | if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { |
813 | addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); |
814 | // No operations on x86mmx supported, everything uses intrinsics. |
815 | } |
816 | |
817 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { |
818 | addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
819 | : &X86::VR128RegClass); |
820 | |
821 | setOperationAction(ISD::FNEG, MVT::v4f32, Custom); |
822 | setOperationAction(ISD::FABS, MVT::v4f32, Custom); |
823 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); |
824 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
825 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); |
826 | setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); |
827 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
828 | setOperationAction(ISD::SELECT, MVT::v4f32, Custom); |
829 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
830 | |
831 | setOperationAction(ISD::LOAD, MVT::v2f32, Custom); |
832 | setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
833 | |
834 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom); |
835 | } |
836 | |
837 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { |
838 | addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
839 | : &X86::VR128RegClass); |
840 | |
841 | // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM |
842 | // registers cannot be used even for integer operations. |
843 | addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass |
844 | : &X86::VR128RegClass); |
845 | addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass |
846 | : &X86::VR128RegClass); |
847 | addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
848 | : &X86::VR128RegClass); |
849 | addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
850 | : &X86::VR128RegClass); |
851 | |
852 | for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, |
853 | MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { |
854 | setOperationAction(ISD::SDIV, VT, Custom); |
855 | setOperationAction(ISD::SREM, VT, Custom); |
856 | setOperationAction(ISD::UDIV, VT, Custom); |
857 | setOperationAction(ISD::UREM, VT, Custom); |
858 | } |
859 | |
860 | setOperationAction(ISD::MUL, MVT::v2i8, Custom); |
861 | setOperationAction(ISD::MUL, MVT::v4i8, Custom); |
862 | setOperationAction(ISD::MUL, MVT::v8i8, Custom); |
863 | |
864 | setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
865 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
866 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
867 | setOperationAction(ISD::MULHU, MVT::v4i32, Custom); |
868 | setOperationAction(ISD::MULHS, MVT::v4i32, Custom); |
869 | setOperationAction(ISD::MULHU, MVT::v16i8, Custom); |
870 | setOperationAction(ISD::MULHS, MVT::v16i8, Custom); |
871 | setOperationAction(ISD::MULHU, MVT::v8i16, Legal); |
872 | setOperationAction(ISD::MULHS, MVT::v8i16, Legal); |
873 | setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
874 | setOperationAction(ISD::FNEG, MVT::v2f64, Custom); |
875 | setOperationAction(ISD::FABS, MVT::v2f64, Custom); |
876 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); |
877 | |
878 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
879 | setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); |
880 | setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); |
881 | setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); |
882 | setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); |
883 | } |
884 | |
885 | setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); |
886 | setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); |
887 | setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); |
888 | setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); |
889 | setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); |
890 | setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); |
891 | setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); |
892 | setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); |
893 | setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); |
894 | setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); |
895 | setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); |
896 | setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); |
897 | |
898 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
899 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
900 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
901 | |
902 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
903 | setOperationAction(ISD::SETCC, VT, Custom); |
904 | setOperationAction(ISD::CTPOP, VT, Custom); |
905 | setOperationAction(ISD::ABS, VT, Custom); |
906 | |
907 | // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
908 | // setcc all the way to isel and prefer SETGT in some isel patterns. |
909 | setCondCodeAction(ISD::SETLT, VT, Custom); |
910 | setCondCodeAction(ISD::SETLE, VT, Custom); |
911 | } |
912 | |
913 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
914 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
915 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
916 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
917 | setOperationAction(ISD::VSELECT, VT, Custom); |
918 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
919 | } |
920 | |
921 | for (auto VT : { MVT::v2f64, MVT::v2i64 }) { |
922 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
923 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
924 | setOperationAction(ISD::VSELECT, VT, Custom); |
925 | |
926 | if (VT == MVT::v2i64 && !Subtarget.is64Bit()) |
927 | continue; |
928 | |
929 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
930 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
931 | } |
932 | |
933 | // Custom lower v2i64 and v2f64 selects. |
934 | setOperationAction(ISD::SELECT, MVT::v2f64, Custom); |
935 | setOperationAction(ISD::SELECT, MVT::v2i64, Custom); |
936 | setOperationAction(ISD::SELECT, MVT::v4i32, Custom); |
937 | setOperationAction(ISD::SELECT, MVT::v8i16, Custom); |
938 | setOperationAction(ISD::SELECT, MVT::v16i8, Custom); |
939 | |
940 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
941 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); |
942 | |
943 | // Custom legalize these to avoid over promotion or custom promotion. |
944 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); |
945 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); |
946 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); |
947 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); |
948 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); |
949 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); |
950 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); |
951 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); |
952 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); |
953 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); |
954 | |
955 | // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into |
956 | // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is |
957 | // split again based on the input type, this will cause an AssertSExt i16 to |
958 | // be emitted instead of an AssertZExt. This will allow packssdw followed by |
959 | // packuswb to be used to truncate to v8i8. This is necessary since packusdw |
960 | // isn't available until sse4.1. |
961 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
962 | |
963 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
964 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
965 | |
966 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
967 | |
968 | // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion. |
969 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); |
970 | |
971 | setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
972 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); |
973 | |
974 | // We want to legalize this to an f64 load rather than an i64 load on |
975 | // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for |
976 | // store. |
977 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
978 | setOperationAction(ISD::LOAD, MVT::v4i16, Custom); |
979 | setOperationAction(ISD::LOAD, MVT::v8i8, Custom); |
980 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
981 | setOperationAction(ISD::STORE, MVT::v4i16, Custom); |
982 | setOperationAction(ISD::STORE, MVT::v8i8, Custom); |
983 | |
984 | setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); |
985 | setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); |
986 | setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); |
987 | if (!Subtarget.hasAVX512()) |
988 | setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); |
989 | |
990 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); |
991 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); |
992 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); |
993 | |
994 | setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); |
995 | |
996 | setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
997 | setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
998 | setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); |
999 | setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
1000 | setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
1001 | setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
1002 | |
1003 | // In the customized shift lowering, the legal v4i32/v2i64 cases |
1004 | // in AVX2 will be recognized. |
1005 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1006 | setOperationAction(ISD::SRL, VT, Custom); |
1007 | setOperationAction(ISD::SHL, VT, Custom); |
1008 | setOperationAction(ISD::SRA, VT, Custom); |
1009 | } |
1010 | |
1011 | setOperationAction(ISD::ROTL, MVT::v4i32, Custom); |
1012 | setOperationAction(ISD::ROTL, MVT::v8i16, Custom); |
1013 | |
1014 | // With AVX512, expanding (and promoting the shifts) is better. |
1015 | if (!Subtarget.hasAVX512()) |
1016 | setOperationAction(ISD::ROTL, MVT::v16i8, Custom); |
1017 | } |
1018 | |
1019 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { |
1020 | setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
1021 | setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
1022 | setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
1023 | setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); |
1024 | setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); |
1025 | setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); |
1026 | setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); |
1027 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
1028 | |
1029 | // These might be better off as horizontal vector ops. |
1030 | setOperationAction(ISD::ADD, MVT::i16, Custom); |
1031 | setOperationAction(ISD::ADD, MVT::i32, Custom); |
1032 | setOperationAction(ISD::SUB, MVT::i16, Custom); |
1033 | setOperationAction(ISD::SUB, MVT::i32, Custom); |
1034 | } |
1035 | |
1036 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { |
1037 | for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { |
1038 | setOperationAction(ISD::FFLOOR, RoundedTy, Legal); |
1039 | setOperationAction(ISD::FCEIL, RoundedTy, Legal); |
1040 | setOperationAction(ISD::FTRUNC, RoundedTy, Legal); |
1041 | setOperationAction(ISD::FRINT, RoundedTy, Legal); |
1042 | setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); |
1043 | } |
1044 | |
1045 | setOperationAction(ISD::SMAX, MVT::v16i8, Legal); |
1046 | setOperationAction(ISD::SMAX, MVT::v4i32, Legal); |
1047 | setOperationAction(ISD::UMAX, MVT::v8i16, Legal); |
1048 | setOperationAction(ISD::UMAX, MVT::v4i32, Legal); |
1049 | setOperationAction(ISD::SMIN, MVT::v16i8, Legal); |
1050 | setOperationAction(ISD::SMIN, MVT::v4i32, Legal); |
1051 | setOperationAction(ISD::UMIN, MVT::v8i16, Legal); |
1052 | setOperationAction(ISD::UMIN, MVT::v4i32, Legal); |
1053 | |
1054 | // FIXME: Do we need to handle scalar-to-vector here? |
1055 | setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
1056 | |
1057 | // We directly match byte blends in the backend as they match the VSELECT |
1058 | // condition form. |
1059 | setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
1060 | |
1061 | // SSE41 brings specific instructions for doing vector sign extend even in |
1062 | // cases where we don't have SRA. |
1063 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1064 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); |
1065 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); |
1066 | } |
1067 | |
1068 | // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X |
1069 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1070 | setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); |
1071 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); |
1072 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); |
1073 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); |
1074 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); |
1075 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); |
1076 | } |
1077 | |
1078 | // i8 vectors are custom because the source register and source |
1079 | // source memory operand types are not the same width. |
1080 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
1081 | } |
1082 | |
1083 | if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { |
1084 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1085 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1086 | setOperationAction(ISD::ROTL, VT, Custom); |
1087 | |
1088 | // XOP can efficiently perform BITREVERSE with VPPERM. |
1089 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) |
1090 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1091 | |
1092 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1093 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1094 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1095 | } |
1096 | |
1097 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { |
1098 | bool HasInt256 = Subtarget.hasInt256(); |
1099 | |
1100 | addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1101 | : &X86::VR256RegClass); |
1102 | addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1103 | : &X86::VR256RegClass); |
1104 | addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1105 | : &X86::VR256RegClass); |
1106 | addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1107 | : &X86::VR256RegClass); |
1108 | addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1109 | : &X86::VR256RegClass); |
1110 | addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1111 | : &X86::VR256RegClass); |
1112 | |
1113 | for (auto VT : { MVT::v8f32, MVT::v4f64 }) { |
1114 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1115 | setOperationAction(ISD::FCEIL, VT, Legal); |
1116 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1117 | setOperationAction(ISD::FRINT, VT, Legal); |
1118 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1119 | setOperationAction(ISD::FNEG, VT, Custom); |
1120 | setOperationAction(ISD::FABS, VT, Custom); |
1121 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1122 | } |
1123 | |
1124 | // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted |
1125 | // even though v8i16 is a legal type. |
1126 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
1127 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
1128 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); |
1129 | |
1130 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); |
1131 | |
1132 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom); |
1133 | |
1134 | if (!Subtarget.hasAVX512()) |
1135 | setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); |
1136 | |
1137 | // In the customized shift lowering, the legal v8i32/v4i64 cases |
1138 | // in AVX2 will be recognized. |
1139 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1140 | setOperationAction(ISD::SRL, VT, Custom); |
1141 | setOperationAction(ISD::SHL, VT, Custom); |
1142 | setOperationAction(ISD::SRA, VT, Custom); |
1143 | } |
1144 | |
1145 | // These types need custom splitting if their input is a 128-bit vector. |
1146 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1147 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1148 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1149 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1150 | |
1151 | setOperationAction(ISD::ROTL, MVT::v8i32, Custom); |
1152 | setOperationAction(ISD::ROTL, MVT::v16i16, Custom); |
1153 | |
1154 | // With BWI, expanding (and promoting the shifts) is the better. |
1155 | if (!Subtarget.hasBWI()) |
1156 | setOperationAction(ISD::ROTL, MVT::v32i8, Custom); |
1157 | |
1158 | setOperationAction(ISD::SELECT, MVT::v4f64, Custom); |
1159 | setOperationAction(ISD::SELECT, MVT::v4i64, Custom); |
1160 | setOperationAction(ISD::SELECT, MVT::v8i32, Custom); |
1161 | setOperationAction(ISD::SELECT, MVT::v16i16, Custom); |
1162 | setOperationAction(ISD::SELECT, MVT::v32i8, Custom); |
1163 | setOperationAction(ISD::SELECT, MVT::v8f32, Custom); |
1164 | |
1165 | for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1166 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1167 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1168 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1169 | } |
1170 | |
1171 | setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); |
1172 | setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); |
1173 | setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); |
1174 | setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); |
1175 | |
1176 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1177 | setOperationAction(ISD::SETCC, VT, Custom); |
1178 | setOperationAction(ISD::CTPOP, VT, Custom); |
1179 | setOperationAction(ISD::CTLZ, VT, Custom); |
1180 | |
1181 | // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
1182 | // setcc all the way to isel and prefer SETGT in some isel patterns. |
1183 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1184 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1185 | } |
1186 | |
1187 | if (Subtarget.hasAnyFMA()) { |
1188 | for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, |
1189 | MVT::v2f64, MVT::v4f64 }) |
1190 | setOperationAction(ISD::FMA, VT, Legal); |
1191 | } |
1192 | |
1193 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1194 | setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); |
1195 | setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); |
1196 | } |
1197 | |
1198 | setOperationAction(ISD::MUL, MVT::v4i64, Custom); |
1199 | setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); |
1200 | setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); |
1201 | setOperationAction(ISD::MUL, MVT::v32i8, Custom); |
1202 | |
1203 | setOperationAction(ISD::MULHU, MVT::v8i32, Custom); |
1204 | setOperationAction(ISD::MULHS, MVT::v8i32, Custom); |
1205 | setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); |
1206 | setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); |
1207 | setOperationAction(ISD::MULHU, MVT::v32i8, Custom); |
1208 | setOperationAction(ISD::MULHS, MVT::v32i8, Custom); |
1209 | |
1210 | setOperationAction(ISD::ABS, MVT::v4i64, Custom); |
1211 | setOperationAction(ISD::SMAX, MVT::v4i64, Custom); |
1212 | setOperationAction(ISD::UMAX, MVT::v4i64, Custom); |
1213 | setOperationAction(ISD::SMIN, MVT::v4i64, Custom); |
1214 | setOperationAction(ISD::UMIN, MVT::v4i64, Custom); |
1215 | |
1216 | setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1217 | setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1218 | setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1219 | setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1220 | setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1221 | setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1222 | setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1223 | setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1224 | |
1225 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { |
1226 | setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); |
1227 | setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); |
1228 | setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); |
1229 | setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); |
1230 | setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); |
1231 | } |
1232 | |
1233 | for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { |
1234 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1235 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1236 | } |
1237 | |
1238 | if (HasInt256) { |
1239 | // The custom lowering for UINT_TO_FP for v8i32 becomes interesting |
1240 | // when we have a 256bit-wide blend with immediate. |
1241 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); |
1242 | |
1243 | // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X |
1244 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1245 | setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); |
1246 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); |
1247 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); |
1248 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); |
1249 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); |
1250 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); |
1251 | } |
1252 | } |
1253 | |
1254 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1255 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { |
1256 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1257 | setOperationAction(ISD::MSTORE, VT, Legal); |
1258 | } |
1259 | |
1260 | // Extract subvector is special because the value type |
1261 | // (result) is 128-bit but the source is 256-bit wide. |
1262 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1263 | MVT::v4f32, MVT::v2f64 }) { |
1264 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1265 | } |
1266 | |
1267 | // Custom lower several nodes for 256-bit types. |
1268 | for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1269 | MVT::v8f32, MVT::v4f64 }) { |
1270 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1271 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1272 | setOperationAction(ISD::VSELECT, VT, Custom); |
1273 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1274 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1275 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1276 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1277 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1278 | setOperationAction(ISD::STORE, VT, Custom); |
1279 | } |
1280 | |
1281 | if (HasInt256) { |
1282 | setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); |
1283 | |
1284 | // Custom legalize 2x32 to get a little better code. |
1285 | setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); |
1286 | setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); |
1287 | |
1288 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1289 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1290 | setOperationAction(ISD::MGATHER, VT, Custom); |
1291 | } |
1292 | } |
1293 | |
1294 | // This block controls legalization of the mask vector sizes that are |
1295 | // available with AVX512. 512-bit vectors are in a separate block controlled |
1296 | // by useAVX512Regs. |
1297 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1298 | addRegisterClass(MVT::v1i1, &X86::VK1RegClass); |
1299 | addRegisterClass(MVT::v2i1, &X86::VK2RegClass); |
1300 | addRegisterClass(MVT::v4i1, &X86::VK4RegClass); |
1301 | addRegisterClass(MVT::v8i1, &X86::VK8RegClass); |
1302 | addRegisterClass(MVT::v16i1, &X86::VK16RegClass); |
1303 | |
1304 | setOperationAction(ISD::SELECT, MVT::v1i1, Custom); |
1305 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); |
1306 | setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); |
1307 | |
1308 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
1309 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
1310 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
1311 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
1312 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); |
1313 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); |
1314 | |
1315 | // There is no byte sized k-register load or store without AVX512DQ. |
1316 | if (!Subtarget.hasDQI()) { |
1317 | setOperationAction(ISD::LOAD, MVT::v1i1, Custom); |
1318 | setOperationAction(ISD::LOAD, MVT::v2i1, Custom); |
1319 | setOperationAction(ISD::LOAD, MVT::v4i1, Custom); |
1320 | setOperationAction(ISD::LOAD, MVT::v8i1, Custom); |
1321 | |
1322 | setOperationAction(ISD::STORE, MVT::v1i1, Custom); |
1323 | setOperationAction(ISD::STORE, MVT::v2i1, Custom); |
1324 | setOperationAction(ISD::STORE, MVT::v4i1, Custom); |
1325 | setOperationAction(ISD::STORE, MVT::v8i1, Custom); |
1326 | } |
1327 | |
1328 | // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. |
1329 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1330 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1331 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1332 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1333 | } |
1334 | |
1335 | for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { |
1336 | setOperationAction(ISD::ADD, VT, Custom); |
1337 | setOperationAction(ISD::SUB, VT, Custom); |
1338 | setOperationAction(ISD::MUL, VT, Custom); |
1339 | setOperationAction(ISD::SETCC, VT, Custom); |
1340 | setOperationAction(ISD::SELECT, VT, Custom); |
1341 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1342 | setOperationAction(ISD::UADDSAT, VT, Custom); |
1343 | setOperationAction(ISD::SADDSAT, VT, Custom); |
1344 | setOperationAction(ISD::USUBSAT, VT, Custom); |
1345 | setOperationAction(ISD::SSUBSAT, VT, Custom); |
1346 | |
1347 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1348 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1349 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1350 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1351 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1352 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1353 | setOperationAction(ISD::VSELECT, VT, Expand); |
1354 | } |
1355 | |
1356 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) |
1357 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1358 | } |
1359 | |
1360 | // This block controls legalization for 512-bit operations with 32/64 bit |
1361 | // elements. 512-bits can be disabled based on prefer-vector-width and |
1362 | // required-vector-width function attributes. |
1363 | if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { |
1364 | addRegisterClass(MVT::v16i32, &X86::VR512RegClass); |
1365 | addRegisterClass(MVT::v16f32, &X86::VR512RegClass); |
1366 | addRegisterClass(MVT::v8i64, &X86::VR512RegClass); |
1367 | addRegisterClass(MVT::v8f64, &X86::VR512RegClass); |
1368 | |
1369 | for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
1370 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); |
1371 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); |
1372 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); |
1373 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); |
1374 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); |
1375 | } |
1376 | |
1377 | for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { |
1378 | setOperationAction(ISD::FNEG, VT, Custom); |
1379 | setOperationAction(ISD::FABS, VT, Custom); |
1380 | setOperationAction(ISD::FMA, VT, Legal); |
1381 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1382 | } |
1383 | |
1384 | setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); |
1385 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); |
1386 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); |
1387 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); |
1388 | setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); |
1389 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); |
1390 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); |
1391 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); |
1392 | setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); |
1393 | setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); |
1394 | |
1395 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom); |
1396 | |
1397 | setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); |
1398 | setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); |
1399 | setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); |
1400 | setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); |
1401 | setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); |
1402 | |
1403 | // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE |
1404 | // to 512-bit rather than use the AVX2 instructions so that we can use |
1405 | // k-masks. |
1406 | if (!Subtarget.hasVLX()) { |
1407 | for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1408 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { |
1409 | setOperationAction(ISD::MLOAD, VT, Custom); |
1410 | setOperationAction(ISD::MSTORE, VT, Custom); |
1411 | } |
1412 | } |
1413 | |
1414 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); |
1415 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); |
1416 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1417 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1418 | setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); |
1419 | setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); |
1420 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1421 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1422 | |
1423 | // Need to custom widen this if we don't have AVX512BW. |
1424 | setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); |
1425 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); |
1426 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); |
1427 | |
1428 | for (auto VT : { MVT::v16f32, MVT::v8f64 }) { |
1429 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1430 | setOperationAction(ISD::FCEIL, VT, Legal); |
1431 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1432 | setOperationAction(ISD::FRINT, VT, Legal); |
1433 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1434 | |
1435 | setOperationAction(ISD::SELECT, VT, Custom); |
1436 | } |
1437 | |
1438 | // Without BWI we need to use custom lowering to handle MVT::v64i8 input. |
1439 | for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) { |
1440 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1441 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1442 | } |
1443 | |
1444 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); |
1445 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); |
1446 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); |
1447 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); |
1448 | |
1449 | setOperationAction(ISD::MUL, MVT::v8i64, Custom); |
1450 | setOperationAction(ISD::MUL, MVT::v16i32, Legal); |
1451 | |
1452 | setOperationAction(ISD::MULHU, MVT::v16i32, Custom); |
1453 | setOperationAction(ISD::MULHS, MVT::v16i32, Custom); |
1454 | |
1455 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
1456 | setOperationAction(ISD::SMAX, VT, Legal); |
1457 | setOperationAction(ISD::UMAX, VT, Legal); |
1458 | setOperationAction(ISD::SMIN, VT, Legal); |
1459 | setOperationAction(ISD::UMIN, VT, Legal); |
1460 | setOperationAction(ISD::ABS, VT, Legal); |
1461 | setOperationAction(ISD::SRL, VT, Custom); |
1462 | setOperationAction(ISD::SHL, VT, Custom); |
1463 | setOperationAction(ISD::SRA, VT, Custom); |
1464 | setOperationAction(ISD::CTPOP, VT, Custom); |
1465 | setOperationAction(ISD::ROTL, VT, Custom); |
1466 | setOperationAction(ISD::ROTR, VT, Custom); |
1467 | setOperationAction(ISD::SETCC, VT, Custom); |
1468 | setOperationAction(ISD::SELECT, VT, Custom); |
1469 | |
1470 | // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
1471 | // setcc all the way to isel and prefer SETGT in some isel patterns. |
1472 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1473 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1474 | } |
1475 | |
1476 | if (Subtarget.hasDQI()) { |
1477 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); |
1478 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); |
1479 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); |
1480 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); |
1481 | |
1482 | setOperationAction(ISD::MUL, MVT::v8i64, Legal); |
1483 | } |
1484 | |
1485 | if (Subtarget.hasCDI()) { |
1486 | // NonVLX sub-targets extend 128/256 vectors to use the 512 version. |
1487 | for (auto VT : { MVT::v16i32, MVT::v8i64} ) { |
1488 | setOperationAction(ISD::CTLZ, VT, Legal); |
1489 | } |
1490 | } // Subtarget.hasCDI() |
1491 | |
1492 | if (Subtarget.hasVPOPCNTDQ()) { |
1493 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) |
1494 | setOperationAction(ISD::CTPOP, VT, Legal); |
1495 | } |
1496 | |
1497 | // Extract subvector is special because the value type |
1498 | // (result) is 256-bit but the source is 512-bit wide. |
1499 | // 128-bit was made Legal under AVX1. |
1500 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1501 | MVT::v8f32, MVT::v4f64 }) |
1502 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1503 | |
1504 | for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { |
1505 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1506 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1507 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1508 | setOperationAction(ISD::VSELECT, VT, Custom); |
1509 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1510 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1511 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1512 | setOperationAction(ISD::MLOAD, VT, Legal); |
1513 | setOperationAction(ISD::MSTORE, VT, Legal); |
1514 | setOperationAction(ISD::MGATHER, VT, Custom); |
1515 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1516 | } |
1517 | if (!Subtarget.hasBWI()) { |
1518 | // Need to custom split v32i16/v64i8 bitcasts. |
1519 | setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); |
1520 | setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); |
1521 | |
1522 | // Better to split these into two 256-bit ops. |
1523 | setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom); |
1524 | setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom); |
1525 | } |
1526 | |
1527 | if (Subtarget.hasVBMI2()) { |
1528 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
1529 | setOperationAction(ISD::FSHL, VT, Custom); |
1530 | setOperationAction(ISD::FSHR, VT, Custom); |
1531 | } |
1532 | } |
1533 | }// has AVX-512 |
1534 | |
1535 | // This block controls legalization for operations that don't have |
1536 | // pre-AVX512 equivalents. Without VLX we use 512-bit operations for |
1537 | // narrower widths. |
1538 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1539 | // These operations are handled on non-VLX by artificially widening in |
1540 | // isel patterns. |
1541 | // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? |
1542 | |
1543 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); |
1544 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
1545 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); |
1546 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); |
1547 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
1548 | |
1549 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1550 | setOperationAction(ISD::SMAX, VT, Legal); |
1551 | setOperationAction(ISD::UMAX, VT, Legal); |
1552 | setOperationAction(ISD::SMIN, VT, Legal); |
1553 | setOperationAction(ISD::UMIN, VT, Legal); |
1554 | setOperationAction(ISD::ABS, VT, Legal); |
1555 | } |
1556 | |
1557 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1558 | setOperationAction(ISD::ROTL, VT, Custom); |
1559 | setOperationAction(ISD::ROTR, VT, Custom); |
1560 | } |
1561 | |
1562 | // Custom legalize 2x32 to get a little better code. |
1563 | setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); |
1564 | setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); |
1565 | |
1566 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1567 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1568 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1569 | |
1570 | if (Subtarget.hasDQI()) { |
1571 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1572 | setOperationAction(ISD::SINT_TO_FP, VT, Legal); |
1573 | setOperationAction(ISD::UINT_TO_FP, VT, Legal); |
1574 | setOperationAction(ISD::FP_TO_SINT, VT, Legal); |
1575 | setOperationAction(ISD::FP_TO_UINT, VT, Legal); |
1576 | |
1577 | setOperationAction(ISD::MUL, VT, Legal); |
1578 | } |
1579 | } |
1580 | |
1581 | if (Subtarget.hasCDI()) { |
1582 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1583 | setOperationAction(ISD::CTLZ, VT, Legal); |
1584 | } |
1585 | } // Subtarget.hasCDI() |
1586 | |
1587 | if (Subtarget.hasVPOPCNTDQ()) { |
1588 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) |
1589 | setOperationAction(ISD::CTPOP, VT, Legal); |
1590 | } |
1591 | } |
1592 | |
1593 | // This block control legalization of v32i1/v64i1 which are available with |
1594 | // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with |
1595 | // useBWIRegs. |
1596 | if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
1597 | addRegisterClass(MVT::v32i1, &X86::VK32RegClass); |
1598 | addRegisterClass(MVT::v64i1, &X86::VK64RegClass); |
1599 | |
1600 | for (auto VT : { MVT::v32i1, MVT::v64i1 }) { |
1601 | setOperationAction(ISD::ADD, VT, Custom); |
1602 | setOperationAction(ISD::SUB, VT, Custom); |
1603 | setOperationAction(ISD::MUL, VT, Custom); |
1604 | setOperationAction(ISD::VSELECT, VT, Expand); |
1605 | setOperationAction(ISD::UADDSAT, VT, Custom); |
1606 | setOperationAction(ISD::SADDSAT, VT, Custom); |
1607 | setOperationAction(ISD::USUBSAT, VT, Custom); |
1608 | setOperationAction(ISD::SSUBSAT, VT, Custom); |
1609 | |
1610 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1611 | setOperationAction(ISD::SETCC, VT, Custom); |
1612 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1613 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1614 | setOperationAction(ISD::SELECT, VT, Custom); |
1615 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1616 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1617 | } |
1618 | |
1619 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); |
1620 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); |
1621 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); |
1622 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); |
1623 | for (auto VT : { MVT::v16i1, MVT::v32i1 }) |
1624 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1625 | |
1626 | // Extends from v32i1 masks to 256-bit vectors. |
1627 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); |
1628 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); |
1629 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); |
1630 | } |
1631 | |
1632 | // This block controls legalization for v32i16 and v64i8. 512-bits can be |
1633 | // disabled based on prefer-vector-width and required-vector-width function |
1634 | // attributes. |
1635 | if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { |
1636 | addRegisterClass(MVT::v32i16, &X86::VR512RegClass); |
1637 | addRegisterClass(MVT::v64i8, &X86::VR512RegClass); |
1638 | |
1639 | // Extends from v64i1 masks to 512-bit vectors. |
1640 | setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); |
1641 | setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); |
1642 | setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); |
1643 | |
1644 | setOperationAction(ISD::MUL, MVT::v32i16, Legal); |
1645 | setOperationAction(ISD::MUL, MVT::v64i8, Custom); |
1646 | setOperationAction(ISD::MULHS, MVT::v32i16, Legal); |
1647 | setOperationAction(ISD::MULHU, MVT::v32i16, Legal); |
1648 | setOperationAction(ISD::MULHS, MVT::v64i8, Custom); |
1649 | setOperationAction(ISD::MULHU, MVT::v64i8, Custom); |
1650 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); |
1651 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); |
1652 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); |
1653 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); |
1654 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); |
1655 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); |
1656 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); |
1657 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); |
1658 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); |
1659 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); |
1660 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); |
1661 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); |
1662 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); |
1663 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); |
1664 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); |
1665 | setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); |
1666 | setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); |
1667 | |
1668 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); |
1669 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); |
1670 | |
1671 | setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); |
1672 | |
1673 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
1674 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1675 | setOperationAction(ISD::VSELECT, VT, Custom); |
1676 | setOperationAction(ISD::ABS, VT, Legal); |
1677 | setOperationAction(ISD::SRL, VT, Custom); |
1678 | setOperationAction(ISD::SHL, VT, Custom); |
1679 | setOperationAction(ISD::SRA, VT, Custom); |
1680 | setOperationAction(ISD::MLOAD, VT, Legal); |
1681 | setOperationAction(ISD::MSTORE, VT, Legal); |
1682 | setOperationAction(ISD::CTPOP, VT, Custom); |
1683 | setOperationAction(ISD::CTLZ, VT, Custom); |
1684 | setOperationAction(ISD::SMAX, VT, Legal); |
1685 | setOperationAction(ISD::UMAX, VT, Legal); |
1686 | setOperationAction(ISD::SMIN, VT, Legal); |
1687 | setOperationAction(ISD::UMIN, VT, Legal); |
1688 | setOperationAction(ISD::SETCC, VT, Custom); |
1689 | setOperationAction(ISD::UADDSAT, VT, Legal); |
1690 | setOperationAction(ISD::SADDSAT, VT, Legal); |
1691 | setOperationAction(ISD::USUBSAT, VT, Legal); |
1692 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
1693 | setOperationAction(ISD::SELECT, VT, Custom); |
1694 | |
1695 | // The condition codes aren't legal in SSE/AVX and under AVX512 we use |
1696 | // setcc all the way to isel and prefer SETGT in some isel patterns. |
1697 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1698 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1699 | } |
1700 | |
1701 | for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
1702 | setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); |
1703 | } |
1704 | |
1705 | if (Subtarget.hasBITALG()) { |
1706 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) |
1707 | setOperationAction(ISD::CTPOP, VT, Legal); |
1708 | } |
1709 | |
1710 | if (Subtarget.hasVBMI2()) { |
1711 | setOperationAction(ISD::FSHL, MVT::v32i16, Custom); |
1712 | setOperationAction(ISD::FSHR, MVT::v32i16, Custom); |
1713 | } |
1714 | } |
1715 | |
1716 | if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
1717 | for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { |
1718 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1719 | setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); |
1720 | } |
1721 | |
1722 | // These operations are handled on non-VLX by artificially widening in |
1723 | // isel patterns. |
1724 | // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? |
1725 | |
1726 | if (Subtarget.hasBITALG()) { |
1727 | for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) |
1728 | setOperationAction(ISD::CTPOP, VT, Legal); |
1729 | } |
1730 | } |
1731 | |
1732 | if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { |
1733 | setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); |
1734 | setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); |
1735 | setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); |
1736 | setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); |
1737 | setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); |
1738 | |
1739 | setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); |
1740 | setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); |
1741 | setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); |
1742 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
1743 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
1744 | |
1745 | if (Subtarget.hasDQI()) { |
1746 | // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. |
1747 | // v2f32 UINT_TO_FP is already custom under SSE2. |
1748 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); |
1749 | assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!" ) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 1750, __PRETTY_FUNCTION__)) |
1750 | "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!" ) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 1750, __PRETTY_FUNCTION__)); |
1751 | // v2i64 FP_TO_S/UINT(v2f32) custom conversion. |
1752 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); |
1753 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); |
1754 | } |
1755 | |
1756 | if (Subtarget.hasBWI()) { |
1757 | setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); |
1758 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
1759 | } |
1760 | |
1761 | if (Subtarget.hasVBMI2()) { |
1762 | // TODO: Make these legal even without VLX? |
1763 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1764 | MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1765 | setOperationAction(ISD::FSHL, VT, Custom); |
1766 | setOperationAction(ISD::FSHR, VT, Custom); |
1767 | } |
1768 | } |
1769 | |
1770 | setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); |
1771 | setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); |
1772 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
1773 | } |
1774 | |
1775 | // We want to custom lower some of our intrinsics. |
1776 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
1777 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
1778 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
1779 | if (!Subtarget.is64Bit()) { |
1780 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
1781 | } |
1782 | |
1783 | // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't |
1784 | // handle type legalization for these operations here. |
1785 | // |
1786 | // FIXME: We really should do custom legalization for addition and |
1787 | // subtraction on x86-32 once PR3203 is fixed. We really can't do much better |
1788 | // than generic legalization for 64-bit multiplication-with-overflow, though. |
1789 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
1790 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
1791 | continue; |
1792 | // Add/Sub/Mul with overflow operations are custom lowered. |
1793 | setOperationAction(ISD::SADDO, VT, Custom); |
1794 | setOperationAction(ISD::UADDO, VT, Custom); |
1795 | setOperationAction(ISD::SSUBO, VT, Custom); |
1796 | setOperationAction(ISD::USUBO, VT, Custom); |
1797 | setOperationAction(ISD::SMULO, VT, Custom); |
1798 | setOperationAction(ISD::UMULO, VT, Custom); |
1799 | |
1800 | // Support carry in as value rather than glue. |
1801 | setOperationAction(ISD::ADDCARRY, VT, Custom); |
1802 | setOperationAction(ISD::SUBCARRY, VT, Custom); |
1803 | setOperationAction(ISD::SETCCCARRY, VT, Custom); |
1804 | } |
1805 | |
1806 | if (!Subtarget.is64Bit()) { |
1807 | // These libcalls are not available in 32-bit. |
1808 | setLibcallName(RTLIB::SHL_I128, nullptr); |
1809 | setLibcallName(RTLIB::SRL_I128, nullptr); |
1810 | setLibcallName(RTLIB::SRA_I128, nullptr); |
1811 | setLibcallName(RTLIB::MUL_I128, nullptr); |
1812 | } |
1813 | |
1814 | // Combine sin / cos into _sincos_stret if it is available. |
1815 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
1816 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
1817 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
1818 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
1819 | } |
1820 | |
1821 | if (Subtarget.isTargetWin64()) { |
1822 | setOperationAction(ISD::SDIV, MVT::i128, Custom); |
1823 | setOperationAction(ISD::UDIV, MVT::i128, Custom); |
1824 | setOperationAction(ISD::SREM, MVT::i128, Custom); |
1825 | setOperationAction(ISD::UREM, MVT::i128, Custom); |
1826 | setOperationAction(ISD::SDIVREM, MVT::i128, Custom); |
1827 | setOperationAction(ISD::UDIVREM, MVT::i128, Custom); |
1828 | } |
1829 | |
1830 | // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` |
1831 | // is. We should promote the value to 64-bits to solve this. |
1832 | // This is what the CRT headers do - `fmodf` is an inline header |
1833 | // function casting to f64 and calling `fmod`. |
1834 | if (Subtarget.is32Bit() && |
1835 | (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) |
1836 | for (ISD::NodeType Op : |
1837 | {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG, |
1838 | ISD::FLOG10, ISD::FPOW, ISD::FSIN}) |
1839 | if (isOperationExpand(Op, MVT::f32)) |
1840 | setOperationAction(Op, MVT::f32, Promote); |
1841 | |
1842 | // We have target-specific dag combine patterns for the following nodes: |
1843 | setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
1844 | setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); |
1845 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
1846 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
1847 | setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
1848 | setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); |
1849 | setTargetDAGCombine(ISD::BITCAST); |
1850 | setTargetDAGCombine(ISD::VSELECT); |
1851 | setTargetDAGCombine(ISD::SELECT); |
1852 | setTargetDAGCombine(ISD::SHL); |
1853 | setTargetDAGCombine(ISD::SRA); |
1854 | setTargetDAGCombine(ISD::SRL); |
1855 | setTargetDAGCombine(ISD::OR); |
1856 | setTargetDAGCombine(ISD::AND); |
1857 | setTargetDAGCombine(ISD::ADD); |
1858 | setTargetDAGCombine(ISD::FADD); |
1859 | setTargetDAGCombine(ISD::FSUB); |
1860 | setTargetDAGCombine(ISD::FNEG); |
1861 | setTargetDAGCombine(ISD::FMA); |
1862 | setTargetDAGCombine(ISD::FMINNUM); |
1863 | setTargetDAGCombine(ISD::FMAXNUM); |
1864 | setTargetDAGCombine(ISD::SUB); |
1865 | setTargetDAGCombine(ISD::LOAD); |
1866 | setTargetDAGCombine(ISD::MLOAD); |
1867 | setTargetDAGCombine(ISD::STORE); |
1868 | setTargetDAGCombine(ISD::MSTORE); |
1869 | setTargetDAGCombine(ISD::TRUNCATE); |
1870 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
1871 | setTargetDAGCombine(ISD::ANY_EXTEND); |
1872 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
1873 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
1874 | setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); |
1875 | setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); |
1876 | setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); |
1877 | setTargetDAGCombine(ISD::SINT_TO_FP); |
1878 | setTargetDAGCombine(ISD::UINT_TO_FP); |
1879 | setTargetDAGCombine(ISD::SETCC); |
1880 | setTargetDAGCombine(ISD::MUL); |
1881 | setTargetDAGCombine(ISD::XOR); |
1882 | setTargetDAGCombine(ISD::MSCATTER); |
1883 | setTargetDAGCombine(ISD::MGATHER); |
1884 | |
1885 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
1886 | |
1887 | MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores |
1888 | MaxStoresPerMemsetOptSize = 8; |
1889 | MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores |
1890 | MaxStoresPerMemcpyOptSize = 4; |
1891 | MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores |
1892 | MaxStoresPerMemmoveOptSize = 4; |
1893 | |
1894 | // TODO: These control memcmp expansion in CGP and could be raised higher, but |
1895 | // that needs to benchmarked and balanced with the potential use of vector |
1896 | // load/store types (PR33329, PR33914). |
1897 | MaxLoadsPerMemcmp = 2; |
1898 | MaxLoadsPerMemcmpOptSize = 2; |
1899 | |
1900 | // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). |
1901 | setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); |
1902 | |
1903 | // An out-of-order CPU can speculatively execute past a predictable branch, |
1904 | // but a conditional move could be stalled by an expensive earlier operation. |
1905 | PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); |
1906 | EnableExtLdPromotion = true; |
1907 | setPrefFunctionAlignment(Align(16)); |
1908 | |
1909 | verifyIntrinsicTables(); |
1910 | } |
1911 | |
1912 | // This has so far only been implemented for 64-bit MachO. |
1913 | bool X86TargetLowering::useLoadStackGuardNode() const { |
1914 | return Subtarget.isTargetMachO() && Subtarget.is64Bit(); |
1915 | } |
1916 | |
1917 | bool X86TargetLowering::useStackGuardXorFP() const { |
1918 | // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. |
1919 | return Subtarget.getTargetTriple().isOSMSVCRT(); |
1920 | } |
1921 | |
1922 | SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
1923 | const SDLoc &DL) const { |
1924 | EVT PtrTy = getPointerTy(DAG.getDataLayout()); |
1925 | unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; |
1926 | MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); |
1927 | return SDValue(Node, 0); |
1928 | } |
1929 | |
1930 | TargetLoweringBase::LegalizeTypeAction |
1931 | X86TargetLowering::getPreferredVectorAction(MVT VT) const { |
1932 | if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
1933 | return TypeSplitVector; |
1934 | |
1935 | if (VT.getVectorNumElements() != 1 && |
1936 | VT.getVectorElementType() != MVT::i1) |
1937 | return TypeWidenVector; |
1938 | |
1939 | return TargetLoweringBase::getPreferredVectorAction(VT); |
1940 | } |
1941 | |
1942 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
1943 | CallingConv::ID CC, |
1944 | EVT VT) const { |
1945 | // v32i1 vectors should be promoted to v32i8 to match avx2. |
1946 | if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
1947 | return MVT::v32i8; |
1948 | // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
1949 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
1950 | Subtarget.hasAVX512() && |
1951 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
1952 | (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
1953 | (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) |
1954 | return MVT::i8; |
1955 | // FIXME: Should we just make these types legal and custom split operations? |
1956 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && |
1957 | Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) |
1958 | return MVT::v16i32; |
1959 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
1960 | } |
1961 | |
1962 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
1963 | CallingConv::ID CC, |
1964 | EVT VT) const { |
1965 | // v32i1 vectors should be promoted to v32i8 to match avx2. |
1966 | if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) |
1967 | return 1; |
1968 | // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
1969 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
1970 | Subtarget.hasAVX512() && |
1971 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
1972 | (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
1973 | (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) |
1974 | return VT.getVectorNumElements(); |
1975 | // FIXME: Should we just make these types legal and custom split operations? |
1976 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && |
1977 | Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) |
1978 | return 1; |
1979 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
1980 | } |
1981 | |
1982 | unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
1983 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1984 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
1985 | // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
1986 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
1987 | Subtarget.hasAVX512() && |
1988 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
1989 | (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || |
1990 | (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { |
1991 | RegisterVT = MVT::i8; |
1992 | IntermediateVT = MVT::i1; |
1993 | NumIntermediates = VT.getVectorNumElements(); |
1994 | return NumIntermediates; |
1995 | } |
1996 | |
1997 | return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
1998 | NumIntermediates, RegisterVT); |
1999 | } |
2000 | |
2001 | EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
2002 | LLVMContext& Context, |
2003 | EVT VT) const { |
2004 | if (!VT.isVector()) |
2005 | return MVT::i8; |
2006 | |
2007 | if (Subtarget.hasAVX512()) { |
2008 | const unsigned NumElts = VT.getVectorNumElements(); |
2009 | |
2010 | // Figure out what this type will be legalized to. |
2011 | EVT LegalVT = VT; |
2012 | while (getTypeAction(Context, LegalVT) != TypeLegal) |
2013 | LegalVT = getTypeToTransformTo(Context, LegalVT); |
2014 | |
2015 | // If we got a 512-bit vector then we'll definitely have a vXi1 compare. |
2016 | if (LegalVT.getSimpleVT().is512BitVector()) |
2017 | return EVT::getVectorVT(Context, MVT::i1, NumElts); |
2018 | |
2019 | if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
2020 | // If we legalized to less than a 512-bit vector, then we will use a vXi1 |
2021 | // compare for vXi32/vXi64 for sure. If we have BWI we will also support |
2022 | // vXi16/vXi8. |
2023 | MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
2024 | if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
2025 | return EVT::getVectorVT(Context, MVT::i1, NumElts); |
2026 | } |
2027 | } |
2028 | |
2029 | return VT.changeVectorElementTypeToInteger(); |
2030 | } |
2031 | |
2032 | /// Helper for getByValTypeAlignment to determine |
2033 | /// the desired ByVal argument alignment. |
2034 | static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { |
2035 | if (MaxAlign == 16) |
2036 | return; |
2037 | if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
2038 | if (VTy->getBitWidth() == 128) |
2039 | MaxAlign = 16; |
2040 | } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
2041 | unsigned EltAlign = 0; |
2042 | getMaxByValAlign(ATy->getElementType(), EltAlign); |
2043 | if (EltAlign > MaxAlign) |
2044 | MaxAlign = EltAlign; |
2045 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
2046 | for (auto *EltTy : STy->elements()) { |
2047 | unsigned EltAlign = 0; |
2048 | getMaxByValAlign(EltTy, EltAlign); |
2049 | if (EltAlign > MaxAlign) |
2050 | MaxAlign = EltAlign; |
2051 | if (MaxAlign == 16) |
2052 | break; |
2053 | } |
2054 | } |
2055 | } |
2056 | |
2057 | /// Return the desired alignment for ByVal aggregate |
2058 | /// function arguments in the caller parameter area. For X86, aggregates |
2059 | /// that contain SSE vectors are placed at 16-byte boundaries while the rest |
2060 | /// are at 4-byte boundaries. |
2061 | unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, |
2062 | const DataLayout &DL) const { |
2063 | if (Subtarget.is64Bit()) { |
2064 | // Max of 8 and alignment of type. |
2065 | unsigned TyAlign = DL.getABITypeAlignment(Ty); |
2066 | if (TyAlign > 8) |
2067 | return TyAlign; |
2068 | return 8; |
2069 | } |
2070 | |
2071 | unsigned Align = 4; |
2072 | if (Subtarget.hasSSE1()) |
2073 | getMaxByValAlign(Ty, Align); |
2074 | return Align; |
2075 | } |
2076 | |
2077 | /// Returns the target specific optimal type for load |
2078 | /// and store operations as a result of memset, memcpy, and memmove |
2079 | /// lowering. If DstAlign is zero that means it's safe to destination |
2080 | /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it |
2081 | /// means there isn't a need to check it against alignment requirement, |
2082 | /// probably because the source does not need to be loaded. If 'IsMemset' is |
2083 | /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that |
2084 | /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy |
2085 | /// source is constant so it does not need to be loaded. |
2086 | /// It returns EVT::Other if the type should be determined using generic |
2087 | /// target-independent logic. |
2088 | /// For vector ops we check that the overall size isn't larger than our |
2089 | /// preferred vector width. |
2090 | EVT X86TargetLowering::getOptimalMemOpType( |
2091 | uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, |
2092 | bool ZeroMemset, bool MemcpyStrSrc, |
2093 | const AttributeList &FuncAttributes) const { |
2094 | if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { |
2095 | if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || |
2096 | ((DstAlign == 0 || DstAlign >= 16) && |
2097 | (SrcAlign == 0 || SrcAlign >= 16)))) { |
2098 | // FIXME: Check if unaligned 64-byte accesses are slow. |
2099 | if (Size >= 64 && Subtarget.hasAVX512() && |
2100 | (Subtarget.getPreferVectorWidth() >= 512)) { |
2101 | return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
2102 | } |
2103 | // FIXME: Check if unaligned 32-byte accesses are slow. |
2104 | if (Size >= 32 && Subtarget.hasAVX() && |
2105 | (Subtarget.getPreferVectorWidth() >= 256)) { |
2106 | // Although this isn't a well-supported type for AVX1, we'll let |
2107 | // legalization and shuffle lowering produce the optimal codegen. If we |
2108 | // choose an optimal type with a vector element larger than a byte, |
2109 | // getMemsetStores() may create an intermediate splat (using an integer |
2110 | // multiply) before we splat as a vector. |
2111 | return MVT::v32i8; |
2112 | } |
2113 | if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
2114 | return MVT::v16i8; |
2115 | // TODO: Can SSE1 handle a byte vector? |
2116 | // If we have SSE1 registers we should be able to use them. |
2117 | if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
2118 | (Subtarget.getPreferVectorWidth() >= 128)) |
2119 | return MVT::v4f32; |
2120 | } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && |
2121 | !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
2122 | // Do not use f64 to lower memcpy if source is string constant. It's |
2123 | // better to use i32 to avoid the loads. |
2124 | // Also, do not use f64 to lower memset unless this is a memset of zeros. |
2125 | // The gymnastics of splatting a byte value into an XMM register and then |
2126 | // only using 8-byte stores (because this is a CPU with slow unaligned |
2127 | // 16-byte accesses) makes that a loser. |
2128 | return MVT::f64; |
2129 | } |
2130 | } |
2131 | // This is a compromise. If we reach here, unaligned accesses may be slow on |
2132 | // this target. However, creating smaller, aligned accesses could be even |
2133 | // slower and would certainly be a lot more code. |
2134 | if (Subtarget.is64Bit() && Size >= 8) |
2135 | return MVT::i64; |
2136 | return MVT::i32; |
2137 | } |
2138 | |
2139 | bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
2140 | if (VT == MVT::f32) |
2141 | return X86ScalarSSEf32; |
2142 | else if (VT == MVT::f64) |
2143 | return X86ScalarSSEf64; |
2144 | return true; |
2145 | } |
2146 | |
2147 | bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
2148 | EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags, |
2149 | bool *Fast) const { |
2150 | if (Fast) { |
2151 | switch (VT.getSizeInBits()) { |
2152 | default: |
2153 | // 8-byte and under are always assumed to be fast. |
2154 | *Fast = true; |
2155 | break; |
2156 | case 128: |
2157 | *Fast = !Subtarget.isUnalignedMem16Slow(); |
2158 | break; |
2159 | case 256: |
2160 | *Fast = !Subtarget.isUnalignedMem32Slow(); |
2161 | break; |
2162 | // TODO: What about AVX-512 (512-bit) accesses? |
2163 | } |
2164 | } |
2165 | // NonTemporal vector memory ops must be aligned. |
2166 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
2167 | // NT loads can only be vector aligned, so if its less aligned than the |
2168 | // minimum vector size (which we can split the vector down to), we might as |
2169 | // well use a regular unaligned vector load. |
2170 | // We don't have any NT loads pre-SSE41. |
2171 | if (!!(Flags & MachineMemOperand::MOLoad)) |
2172 | return (Align < 16 || !Subtarget.hasSSE41()); |
2173 | return false; |
2174 | } |
2175 | // Misaligned accesses of any size are always allowed. |
2176 | return true; |
2177 | } |
2178 | |
2179 | /// Return the entry encoding for a jump table in the |
2180 | /// current function. The returned value is a member of the |
2181 | /// MachineJumpTableInfo::JTEntryKind enum. |
2182 | unsigned X86TargetLowering::getJumpTableEncoding() const { |
2183 | // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF |
2184 | // symbol. |
2185 | if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
2186 | return MachineJumpTableInfo::EK_Custom32; |
2187 | |
2188 | // Otherwise, use the normal jump table encoding heuristics. |
2189 | return TargetLowering::getJumpTableEncoding(); |
2190 | } |
2191 | |
2192 | bool X86TargetLowering::useSoftFloat() const { |
2193 | return Subtarget.useSoftFloat(); |
2194 | } |
2195 | |
2196 | void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
2197 | ArgListTy &Args) const { |
2198 | |
2199 | // Only relabel X86-32 for C / Stdcall CCs. |
2200 | if (Subtarget.is64Bit()) |
2201 | return; |
2202 | if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
2203 | return; |
2204 | unsigned ParamRegs = 0; |
2205 | if (auto *M = MF->getFunction().getParent()) |
2206 | ParamRegs = M->getNumberRegisterParameters(); |
2207 | |
2208 | // Mark the first N int arguments as having reg |
2209 | for (unsigned Idx = 0; Idx < Args.size(); Idx++) { |
2210 | Type *T = Args[Idx].Ty; |
2211 | if (T->isIntOrPtrTy()) |
2212 | if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { |
2213 | unsigned numRegs = 1; |
2214 | if (MF->getDataLayout().getTypeAllocSize(T) > 4) |
2215 | numRegs = 2; |
2216 | if (ParamRegs < numRegs) |
2217 | return; |
2218 | ParamRegs -= numRegs; |
2219 | Args[Idx].IsInReg = true; |
2220 | } |
2221 | } |
2222 | } |
2223 | |
2224 | const MCExpr * |
2225 | X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
2226 | const MachineBasicBlock *MBB, |
2227 | unsigned uid,MCContext &Ctx) const{ |
2228 | assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT( )) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2228, __PRETTY_FUNCTION__)); |
2229 | // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF |
2230 | // entries. |
2231 | return MCSymbolRefExpr::create(MBB->getSymbol(), |
2232 | MCSymbolRefExpr::VK_GOTOFF, Ctx); |
2233 | } |
2234 | |
2235 | /// Returns relocation base for the given PIC jumptable. |
2236 | SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
2237 | SelectionDAG &DAG) const { |
2238 | if (!Subtarget.is64Bit()) |
2239 | // This doesn't have SDLoc associated with it, but is not really the |
2240 | // same as a Register. |
2241 | return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
2242 | getPointerTy(DAG.getDataLayout())); |
2243 | return Table; |
2244 | } |
2245 | |
2246 | /// This returns the relocation base for the given PIC jumptable, |
2247 | /// the same as getPICJumpTableRelocBase, but as an MCExpr. |
2248 | const MCExpr *X86TargetLowering:: |
2249 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
2250 | MCContext &Ctx) const { |
2251 | // X86-64 uses RIP relative addressing based on the jump table label. |
2252 | if (Subtarget.isPICStyleRIPRel()) |
2253 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
2254 | |
2255 | // Otherwise, the reference is relative to the PIC base. |
2256 | return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
2257 | } |
2258 | |
2259 | std::pair<const TargetRegisterClass *, uint8_t> |
2260 | X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
2261 | MVT VT) const { |
2262 | const TargetRegisterClass *RRC = nullptr; |
2263 | uint8_t Cost = 1; |
2264 | switch (VT.SimpleTy) { |
2265 | default: |
2266 | return TargetLowering::findRepresentativeClass(TRI, VT); |
2267 | case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
2268 | RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
2269 | break; |
2270 | case MVT::x86mmx: |
2271 | RRC = &X86::VR64RegClass; |
2272 | break; |
2273 | case MVT::f32: case MVT::f64: |
2274 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
2275 | case MVT::v4f32: case MVT::v2f64: |
2276 | case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
2277 | case MVT::v8f32: case MVT::v4f64: |
2278 | case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
2279 | case MVT::v16f32: case MVT::v8f64: |
2280 | RRC = &X86::VR128XRegClass; |
2281 | break; |
2282 | } |
2283 | return std::make_pair(RRC, Cost); |
2284 | } |
2285 | |
2286 | unsigned X86TargetLowering::getAddressSpace() const { |
2287 | if (Subtarget.is64Bit()) |
2288 | return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
2289 | return 256; |
2290 | } |
2291 | |
2292 | static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
2293 | return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
2294 | (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); |
2295 | } |
2296 | |
2297 | static Constant* SegmentOffset(IRBuilder<> &IRB, |
2298 | unsigned Offset, unsigned AddressSpace) { |
2299 | return ConstantExpr::getIntToPtr( |
2300 | ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), |
2301 | Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); |
2302 | } |
2303 | |
2304 | Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { |
2305 | // glibc, bionic, and Fuchsia have a special slot for the stack guard in |
2306 | // tcbhead_t; use it instead of the usual global variable (see |
2307 | // sysdeps/{i386,x86_64}/nptl/tls.h) |
2308 | if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { |
2309 | if (Subtarget.isTargetFuchsia()) { |
2310 | // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
2311 | return SegmentOffset(IRB, 0x10, getAddressSpace()); |
2312 | } else { |
2313 | // %fs:0x28, unless we're using a Kernel code model, in which case |
2314 | // it's %gs:0x28. gs:0x14 on i386. |
2315 | unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
2316 | return SegmentOffset(IRB, Offset, getAddressSpace()); |
2317 | } |
2318 | } |
2319 | |
2320 | return TargetLowering::getIRStackGuard(IRB); |
2321 | } |
2322 | |
2323 | void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
2324 | // MSVC CRT provides functionalities for stack protection. |
2325 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2326 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2327 | // MSVC CRT has a global variable holding security cookie. |
2328 | M.getOrInsertGlobal("__security_cookie", |
2329 | Type::getInt8PtrTy(M.getContext())); |
2330 | |
2331 | // MSVC CRT has a function to validate security cookie. |
2332 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
2333 | "__security_check_cookie", Type::getVoidTy(M.getContext()), |
2334 | Type::getInt8PtrTy(M.getContext())); |
2335 | if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
2336 | F->setCallingConv(CallingConv::X86_FastCall); |
2337 | F->addAttribute(1, Attribute::AttrKind::InReg); |
2338 | } |
2339 | return; |
2340 | } |
2341 | // glibc, bionic, and Fuchsia have a special slot for the stack guard. |
2342 | if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) |
2343 | return; |
2344 | TargetLowering::insertSSPDeclarations(M); |
2345 | } |
2346 | |
2347 | Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
2348 | // MSVC CRT has a global variable holding security cookie. |
2349 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2350 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2351 | return M.getGlobalVariable("__security_cookie"); |
2352 | } |
2353 | return TargetLowering::getSDagStackGuard(M); |
2354 | } |
2355 | |
2356 | Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
2357 | // MSVC CRT has a function to validate security cookie. |
2358 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2359 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2360 | return M.getFunction("__security_check_cookie"); |
2361 | } |
2362 | return TargetLowering::getSSPStackGuardCheck(M); |
2363 | } |
2364 | |
2365 | Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { |
2366 | if (Subtarget.getTargetTriple().isOSContiki()) |
2367 | return getDefaultSafeStackPointerLocation(IRB, false); |
2368 | |
2369 | // Android provides a fixed TLS slot for the SafeStack pointer. See the |
2370 | // definition of TLS_SLOT_SAFESTACK in |
2371 | // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h |
2372 | if (Subtarget.isTargetAndroid()) { |
2373 | // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: |
2374 | // %gs:0x24 on i386 |
2375 | unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
2376 | return SegmentOffset(IRB, Offset, getAddressSpace()); |
2377 | } |
2378 | |
2379 | // Fuchsia is similar. |
2380 | if (Subtarget.isTargetFuchsia()) { |
2381 | // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. |
2382 | return SegmentOffset(IRB, 0x18, getAddressSpace()); |
2383 | } |
2384 | |
2385 | return TargetLowering::getSafeStackPointerLocation(IRB); |
2386 | } |
2387 | |
2388 | bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, |
2389 | unsigned DestAS) const { |
2390 | assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!" ) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2390, __PRETTY_FUNCTION__)); |
2391 | |
2392 | return SrcAS < 256 && DestAS < 256; |
2393 | } |
2394 | |
2395 | //===----------------------------------------------------------------------===// |
2396 | // Return Value Calling Convention Implementation |
2397 | //===----------------------------------------------------------------------===// |
2398 | |
2399 | bool X86TargetLowering::CanLowerReturn( |
2400 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
2401 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
2402 | SmallVector<CCValAssign, 16> RVLocs; |
2403 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
2404 | return CCInfo.CheckReturn(Outs, RetCC_X86); |
2405 | } |
2406 | |
2407 | const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { |
2408 | static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; |
2409 | return ScratchRegs; |
2410 | } |
2411 | |
2412 | /// Lowers masks values (v*i1) to the local register values |
2413 | /// \returns DAG node after lowering to register type |
2414 | static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, |
2415 | const SDLoc &Dl, SelectionDAG &DAG) { |
2416 | EVT ValVT = ValArg.getValueType(); |
2417 | |
2418 | if (ValVT == MVT::v1i1) |
2419 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, |
2420 | DAG.getIntPtrConstant(0, Dl)); |
2421 | |
2422 | if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || |
2423 | (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { |
2424 | // Two stage lowering might be required |
2425 | // bitcast: v8i1 -> i8 / v16i1 -> i16 |
2426 | // anyextend: i8 -> i32 / i16 -> i32 |
2427 | EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; |
2428 | SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); |
2429 | if (ValLoc == MVT::i32) |
2430 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); |
2431 | return ValToCopy; |
2432 | } |
2433 | |
2434 | if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || |
2435 | (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { |
2436 | // One stage lowering is required |
2437 | // bitcast: v32i1 -> i32 / v64i1 -> i64 |
2438 | return DAG.getBitcast(ValLoc, ValArg); |
2439 | } |
2440 | |
2441 | return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); |
2442 | } |
2443 | |
2444 | /// Breaks v64i1 value into two registers and adds the new node to the DAG |
2445 | static void Passv64i1ArgInRegs( |
2446 | const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, |
2447 | SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA, |
2448 | CCValAssign &NextVA, const X86Subtarget &Subtarget) { |
2449 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ? static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2449, __PRETTY_FUNCTION__)); |
2450 | assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ? static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2450, __PRETTY_FUNCTION__)); |
2451 | assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value" ) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2451, __PRETTY_FUNCTION__)); |
2452 | assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers" ) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2453, __PRETTY_FUNCTION__)) |
2453 | "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers" ) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2453, __PRETTY_FUNCTION__)); |
2454 | |
2455 | // Before splitting the value we cast it to i64 |
2456 | Arg = DAG.getBitcast(MVT::i64, Arg); |
2457 | |
2458 | // Splitting the value into two i32 types |
2459 | SDValue Lo, Hi; |
2460 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2461 | DAG.getConstant(0, Dl, MVT::i32)); |
2462 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2463 | DAG.getConstant(1, Dl, MVT::i32)); |
2464 | |
2465 | // Attach the two i32 types into corresponding registers |
2466 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); |
2467 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); |
2468 | } |
2469 | |
2470 | SDValue |
2471 | X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2472 | bool isVarArg, |
2473 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2474 | const SmallVectorImpl<SDValue> &OutVals, |
2475 | const SDLoc &dl, SelectionDAG &DAG) const { |
2476 | MachineFunction &MF = DAG.getMachineFunction(); |
2477 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
2478 | |
2479 | // In some cases we need to disable registers from the default CSR list. |
2480 | // For example, when they are used for argument passing. |
2481 | bool ShouldDisableCalleeSavedRegister = |
2482 | CallConv == CallingConv::X86_RegCall || |
2483 | MF.getFunction().hasFnAttribute("no_caller_saved_registers"); |
2484 | |
2485 | if (CallConv == CallingConv::X86_INTR && !Outs.empty()) |
2486 | report_fatal_error("X86 interrupts may not return any value"); |
2487 | |
2488 | SmallVector<CCValAssign, 16> RVLocs; |
2489 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
2490 | CCInfo.AnalyzeReturn(Outs, RetCC_X86); |
2491 | |
2492 | SDValue Flag; |
2493 | SmallVector<SDValue, 6> RetOps; |
2494 | RetOps.push_back(Chain); // Operand #0 = Chain (updated below) |
2495 | // Operand #1 = Bytes To Pop |
2496 | RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, |
2497 | MVT::i32)); |
2498 | |
2499 | // Copy the result values into the output registers. |
2500 | for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; |
2501 | ++I, ++OutsIndex) { |
2502 | CCValAssign &VA = RVLocs[I]; |
2503 | assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2503, __PRETTY_FUNCTION__)); |
2504 | |
2505 | // Add the register to the CalleeSaveDisableRegs list. |
2506 | if (ShouldDisableCalleeSavedRegister) |
2507 | MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); |
2508 | |
2509 | SDValue ValToCopy = OutVals[OutsIndex]; |
2510 | EVT ValVT = ValToCopy.getValueType(); |
2511 | |
2512 | // Promote values to the appropriate types. |
2513 | if (VA.getLocInfo() == CCValAssign::SExt) |
2514 | ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2515 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
2516 | ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2517 | else if (VA.getLocInfo() == CCValAssign::AExt) { |
2518 | if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) |
2519 | ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); |
2520 | else |
2521 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2522 | } |
2523 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
2524 | ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); |
2525 | |
2526 | assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value." ) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2527, __PRETTY_FUNCTION__)) |
2527 | "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value." ) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2527, __PRETTY_FUNCTION__)); |
2528 | |
2529 | // If this is x86-64, and we disabled SSE, we can't return FP values, |
2530 | // or SSE or MMX vectors. |
2531 | if ((ValVT == MVT::f32 || ValVT == MVT::f64 || |
2532 | VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && |
2533 | (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { |
2534 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
2535 | VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. |
2536 | } else if (ValVT == MVT::f64 && |
2537 | (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { |
2538 | // Likewise we can't return F64 values with SSE1 only. gcc does so, but |
2539 | // llvm-gcc has never done it right and no one has noticed, so this |
2540 | // should be OK for now. |
2541 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
2542 | VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. |
2543 | } |
2544 | |
2545 | // Returns in ST0/ST1 are handled specially: these are pushed as operands to |
2546 | // the RET instruction and handled by the FP Stackifier. |
2547 | if (VA.getLocReg() == X86::FP0 || |
2548 | VA.getLocReg() == X86::FP1) { |
2549 | // If this is a copy from an xmm register to ST(0), use an FPExtend to |
2550 | // change the value to the FP stack register class. |
2551 | if (isScalarFPTypeInSSEReg(VA.getValVT())) |
2552 | ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); |
2553 | RetOps.push_back(ValToCopy); |
2554 | // Don't emit a copytoreg. |
2555 | continue; |
2556 | } |
2557 | |
2558 | // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 |
2559 | // which is returned in RAX / RDX. |
2560 | if (Subtarget.is64Bit()) { |
2561 | if (ValVT == MVT::x86mmx) { |
2562 | if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { |
2563 | ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); |
2564 | ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
2565 | ValToCopy); |
2566 | // If we don't have SSE2 available, convert to v4f32 so the generated |
2567 | // register is legal. |
2568 | if (!Subtarget.hasSSE2()) |
2569 | ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); |
2570 | } |
2571 | } |
2572 | } |
2573 | |
2574 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
2575 | |
2576 | if (VA.needsCustom()) { |
2577 | assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2578, __PRETTY_FUNCTION__)) |
2578 | "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2578, __PRETTY_FUNCTION__)); |
2579 | |
2580 | Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I], |
2581 | Subtarget); |
2582 | |
2583 | assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs" ) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2584, __PRETTY_FUNCTION__)) |
2584 | "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs" ) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2584, __PRETTY_FUNCTION__)); |
2585 | |
2586 | // Add the second register to the CalleeSaveDisableRegs list. |
2587 | if (ShouldDisableCalleeSavedRegister) |
2588 | MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); |
2589 | } else { |
2590 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
2591 | } |
2592 | |
2593 | // Add nodes to the DAG and add the values into the RetOps list |
2594 | for (auto &Reg : RegsToPass) { |
2595 | Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag); |
2596 | Flag = Chain.getValue(1); |
2597 | RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); |
2598 | } |
2599 | } |
2600 | |
2601 | // Swift calling convention does not require we copy the sret argument |
2602 | // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. |
2603 | |
2604 | // All x86 ABIs require that for returning structs by value we copy |
2605 | // the sret argument into %rax/%eax (depending on ABI) for the return. |
2606 | // We saved the argument into a virtual register in the entry block, |
2607 | // so now we copy the value out and into %rax/%eax. |
2608 | // |
2609 | // Checking Function.hasStructRetAttr() here is insufficient because the IR |
2610 | // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is |
2611 | // false, then an sret argument may be implicitly inserted in the SelDAG. In |
2612 | // either case FuncInfo->setSRetReturnReg() will have been called. |
2613 | if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { |
2614 | // When we have both sret and another return value, we should use the |
2615 | // original Chain stored in RetOps[0], instead of the current Chain updated |
2616 | // in the above loop. If we only have sret, RetOps[0] equals to Chain. |
2617 | |
2618 | // For the case of sret and another return value, we have |
2619 | // Chain_0 at the function entry |
2620 | // Chain_1 = getCopyToReg(Chain_0) in the above loop |
2621 | // If we use Chain_1 in getCopyFromReg, we will have |
2622 | // Val = getCopyFromReg(Chain_1) |
2623 | // Chain_2 = getCopyToReg(Chain_1, Val) from below |
2624 | |
2625 | // getCopyToReg(Chain_0) will be glued together with |
2626 | // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be |
2627 | // in Unit B, and we will have cyclic dependency between Unit A and Unit B: |
2628 | // Data dependency from Unit B to Unit A due to usage of Val in |
2629 | // getCopyToReg(Chain_1, Val) |
2630 | // Chain dependency from Unit A to Unit B |
2631 | |
2632 | // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. |
2633 | SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, |
2634 | getPointerTy(MF.getDataLayout())); |
2635 | |
2636 | unsigned RetValReg |
2637 | = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? |
2638 | X86::RAX : X86::EAX; |
2639 | Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); |
2640 | Flag = Chain.getValue(1); |
2641 | |
2642 | // RAX/EAX now acts like a return value. |
2643 | RetOps.push_back( |
2644 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
2645 | |
2646 | // Add the returned register to the CalleeSaveDisableRegs list. |
2647 | if (ShouldDisableCalleeSavedRegister) |
2648 | MF.getRegInfo().disableCalleeSavedRegister(RetValReg); |
2649 | } |
2650 | |
2651 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2652 | const MCPhysReg *I = |
2653 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
2654 | if (I) { |
2655 | for (; *I; ++I) { |
2656 | if (X86::GR64RegClass.contains(*I)) |
2657 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
2658 | else |
2659 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2659); |
2660 | } |
2661 | } |
2662 | |
2663 | RetOps[0] = Chain; // Update chain. |
2664 | |
2665 | // Add the flag if we have it. |
2666 | if (Flag.getNode()) |
2667 | RetOps.push_back(Flag); |
2668 | |
2669 | X86ISD::NodeType opcode = X86ISD::RET_FLAG; |
2670 | if (CallConv == CallingConv::X86_INTR) |
2671 | opcode = X86ISD::IRET; |
2672 | return DAG.getNode(opcode, dl, MVT::Other, RetOps); |
2673 | } |
2674 | |
2675 | bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
2676 | if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) |
2677 | return false; |
2678 | |
2679 | SDValue TCChain = Chain; |
2680 | SDNode *Copy = *N->use_begin(); |
2681 | if (Copy->getOpcode() == ISD::CopyToReg) { |
2682 | // If the copy has a glue operand, we conservatively assume it isn't safe to |
2683 | // perform a tail call. |
2684 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
2685 | return false; |
2686 | TCChain = Copy->getOperand(0); |
2687 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
2688 | return false; |
2689 | |
2690 | bool HasRet = false; |
2691 | for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); |
2692 | UI != UE; ++UI) { |
2693 | if (UI->getOpcode() != X86ISD::RET_FLAG) |
2694 | return false; |
2695 | // If we are returning more than one value, we can definitely |
2696 | // not make a tail call see PR19530 |
2697 | if (UI->getNumOperands() > 4) |
2698 | return false; |
2699 | if (UI->getNumOperands() == 4 && |
2700 | UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) |
2701 | return false; |
2702 | HasRet = true; |
2703 | } |
2704 | |
2705 | if (!HasRet) |
2706 | return false; |
2707 | |
2708 | Chain = TCChain; |
2709 | return true; |
2710 | } |
2711 | |
2712 | EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, |
2713 | ISD::NodeType ExtendKind) const { |
2714 | MVT ReturnMVT = MVT::i32; |
2715 | |
2716 | bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); |
2717 | if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { |
2718 | // The ABI does not require i1, i8 or i16 to be extended. |
2719 | // |
2720 | // On Darwin, there is code in the wild relying on Clang's old behaviour of |
2721 | // always extending i8/i16 return values, so keep doing that for now. |
2722 | // (PR26665). |
2723 | ReturnMVT = MVT::i8; |
2724 | } |
2725 | |
2726 | EVT MinVT = getRegisterType(Context, ReturnMVT); |
2727 | return VT.bitsLT(MinVT) ? MinVT : VT; |
2728 | } |
2729 | |
2730 | /// Reads two 32 bit registers and creates a 64 bit mask value. |
2731 | /// \param VA The current 32 bit value that need to be assigned. |
2732 | /// \param NextVA The next 32 bit value that need to be assigned. |
2733 | /// \param Root The parent DAG node. |
2734 | /// \param [in,out] InFlag Represents SDvalue in the parent DAG node for |
2735 | /// glue purposes. In the case the DAG is already using |
2736 | /// physical register instead of virtual, we should glue |
2737 | /// our new SDValue to InFlag SDvalue. |
2738 | /// \return a new SDvalue of size 64bit. |
2739 | static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, |
2740 | SDValue &Root, SelectionDAG &DAG, |
2741 | const SDLoc &Dl, const X86Subtarget &Subtarget, |
2742 | SDValue *InFlag = nullptr) { |
2743 | assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!" ) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2743, __PRETTY_FUNCTION__)); |
2744 | assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ? static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2744, __PRETTY_FUNCTION__)); |
2745 | assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2746, __PRETTY_FUNCTION__)) |
2746 | "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2746, __PRETTY_FUNCTION__)); |
2747 | assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type" ) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2748, __PRETTY_FUNCTION__)) |
2748 | "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type" ) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2748, __PRETTY_FUNCTION__)); |
2749 | assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers" ) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2750, __PRETTY_FUNCTION__)) |
2750 | "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers" ) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2750, __PRETTY_FUNCTION__)); |
2751 | |
2752 | SDValue Lo, Hi; |
2753 | SDValue ArgValueLo, ArgValueHi; |
2754 | |
2755 | MachineFunction &MF = DAG.getMachineFunction(); |
2756 | const TargetRegisterClass *RC = &X86::GR32RegClass; |
2757 | |
2758 | // Read a 32 bit value from the registers. |
2759 | if (nullptr == InFlag) { |
2760 | // When no physical register is present, |
2761 | // create an intermediate virtual register. |
2762 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); |
2763 | ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
2764 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); |
2765 | ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
2766 | } else { |
2767 | // When a physical register is available read the value from it and glue |
2768 | // the reads together. |
2769 | ArgValueLo = |
2770 | DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); |
2771 | *InFlag = ArgValueLo.getValue(2); |
2772 | ArgValueHi = |
2773 | DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); |
2774 | *InFlag = ArgValueHi.getValue(2); |
2775 | } |
2776 | |
2777 | // Convert the i32 type into v32i1 type. |
2778 | Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); |
2779 | |
2780 | // Convert the i32 type into v32i1 type. |
2781 | Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); |
2782 | |
2783 | // Concatenate the two values together. |
2784 | return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); |
2785 | } |
2786 | |
2787 | /// The function will lower a register of various sizes (8/16/32/64) |
2788 | /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) |
2789 | /// \returns a DAG node contains the operand after lowering to mask type. |
2790 | static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, |
2791 | const EVT &ValLoc, const SDLoc &Dl, |
2792 | SelectionDAG &DAG) { |
2793 | SDValue ValReturned = ValArg; |
2794 | |
2795 | if (ValVT == MVT::v1i1) |
2796 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); |
2797 | |
2798 | if (ValVT == MVT::v64i1) { |
2799 | // In 32 bit machine, this case is handled by getv64i1Argument |
2800 | assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations" ) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2800, __PRETTY_FUNCTION__)); |
2801 | // In 64 bit machine, There is no need to truncate the value only bitcast |
2802 | } else { |
2803 | MVT maskLen; |
2804 | switch (ValVT.getSimpleVT().SimpleTy) { |
2805 | case MVT::v8i1: |
2806 | maskLen = MVT::i8; |
2807 | break; |
2808 | case MVT::v16i1: |
2809 | maskLen = MVT::i16; |
2810 | break; |
2811 | case MVT::v32i1: |
2812 | maskLen = MVT::i32; |
2813 | break; |
2814 | default: |
2815 | llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2815); |
2816 | } |
2817 | |
2818 | ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); |
2819 | } |
2820 | return DAG.getBitcast(ValVT, ValReturned); |
2821 | } |
2822 | |
2823 | /// Lower the result values of a call into the |
2824 | /// appropriate copies out of appropriate physical registers. |
2825 | /// |
2826 | SDValue X86TargetLowering::LowerCallResult( |
2827 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
2828 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
2829 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
2830 | uint32_t *RegMask) const { |
2831 | |
2832 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2833 | // Assign locations to each value returned by this call. |
2834 | SmallVector<CCValAssign, 16> RVLocs; |
2835 | bool Is64Bit = Subtarget.is64Bit(); |
2836 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
2837 | *DAG.getContext()); |
2838 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
2839 | |
2840 | // Copy all of the result registers out of their specified physreg. |
2841 | for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; |
2842 | ++I, ++InsIndex) { |
2843 | CCValAssign &VA = RVLocs[I]; |
2844 | EVT CopyVT = VA.getLocVT(); |
2845 | |
2846 | // In some calling conventions we need to remove the used registers |
2847 | // from the register mask. |
2848 | if (RegMask) { |
2849 | for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); |
2850 | SubRegs.isValid(); ++SubRegs) |
2851 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
2852 | } |
2853 | |
2854 | // If this is x86-64, and we disabled SSE, we can't return FP values |
2855 | if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && |
2856 | ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { |
2857 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
2858 | VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. |
2859 | } else if (CopyVT == MVT::f64 && |
2860 | (Is64Bit && !Subtarget.hasSSE2())) { |
2861 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
2862 | VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. |
2863 | } |
2864 | |
2865 | // If we prefer to use the value in xmm registers, copy it out as f80 and |
2866 | // use a truncate to move it from fp stack reg to xmm reg. |
2867 | bool RoundAfterCopy = false; |
2868 | if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && |
2869 | isScalarFPTypeInSSEReg(VA.getValVT())) { |
2870 | if (!Subtarget.hasX87()) |
2871 | report_fatal_error("X87 register return with X87 disabled"); |
2872 | CopyVT = MVT::f80; |
2873 | RoundAfterCopy = (CopyVT != VA.getLocVT()); |
2874 | } |
2875 | |
2876 | SDValue Val; |
2877 | if (VA.needsCustom()) { |
2878 | assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2879, __PRETTY_FUNCTION__)) |
2879 | "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 2879, __PRETTY_FUNCTION__)); |
2880 | Val = |
2881 | getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag); |
2882 | } else { |
2883 | Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag) |
2884 | .getValue(1); |
2885 | Val = Chain.getValue(0); |
2886 | InFlag = Chain.getValue(2); |
2887 | } |
2888 | |
2889 | if (RoundAfterCopy) |
2890 | Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, |
2891 | // This truncation won't change the value. |
2892 | DAG.getIntPtrConstant(1, dl)); |
2893 | |
2894 | if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) { |
2895 | if (VA.getValVT().isVector() && |
2896 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
2897 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
2898 | // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 |
2899 | Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); |
2900 | } else |
2901 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
2902 | } |
2903 | |
2904 | InVals.push_back(Val); |
2905 | } |
2906 | |
2907 | return Chain; |
2908 | } |
2909 | |
2910 | //===----------------------------------------------------------------------===// |
2911 | // C & StdCall & Fast Calling Convention implementation |
2912 | //===----------------------------------------------------------------------===// |
2913 | // StdCall calling convention seems to be standard for many Windows' API |
2914 | // routines and around. It differs from C calling convention just a little: |
2915 | // callee should clean up the stack, not caller. Symbols should be also |
2916 | // decorated in some fancy way :) It doesn't support any vector arguments. |
2917 | // For info on fast calling convention see Fast Calling Convention (tail call) |
2918 | // implementation LowerX86_32FastCCCallTo. |
2919 | |
2920 | /// CallIsStructReturn - Determines whether a call uses struct return |
2921 | /// semantics. |
2922 | enum StructReturnType { |
2923 | NotStructReturn, |
2924 | RegStructReturn, |
2925 | StackStructReturn |
2926 | }; |
2927 | static StructReturnType |
2928 | callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) { |
2929 | if (Outs.empty()) |
2930 | return NotStructReturn; |
2931 | |
2932 | const ISD::ArgFlagsTy &Flags = Outs[0].Flags; |
2933 | if (!Flags.isSRet()) |
2934 | return NotStructReturn; |
2935 | if (Flags.isInReg() || IsMCU) |
2936 | return RegStructReturn; |
2937 | return StackStructReturn; |
2938 | } |
2939 | |
2940 | /// Determines whether a function uses struct return semantics. |
2941 | static StructReturnType |
2942 | argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) { |
2943 | if (Ins.empty()) |
2944 | return NotStructReturn; |
2945 | |
2946 | const ISD::ArgFlagsTy &Flags = Ins[0].Flags; |
2947 | if (!Flags.isSRet()) |
2948 | return NotStructReturn; |
2949 | if (Flags.isInReg() || IsMCU) |
2950 | return RegStructReturn; |
2951 | return StackStructReturn; |
2952 | } |
2953 | |
2954 | /// Make a copy of an aggregate at address specified by "Src" to address |
2955 | /// "Dst" with size and alignment information specified by the specific |
2956 | /// parameter attribute. The copy will be passed as a byval function parameter. |
2957 | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
2958 | SDValue Chain, ISD::ArgFlagsTy Flags, |
2959 | SelectionDAG &DAG, const SDLoc &dl) { |
2960 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); |
2961 | |
2962 | return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), |
2963 | /*isVolatile*/false, /*AlwaysInline=*/true, |
2964 | /*isTailCall*/false, |
2965 | MachinePointerInfo(), MachinePointerInfo()); |
2966 | } |
2967 | |
2968 | /// Return true if the calling convention is one that we can guarantee TCO for. |
2969 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
2970 | return (CC == CallingConv::Fast || CC == CallingConv::GHC || |
2971 | CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || |
2972 | CC == CallingConv::HHVM || CC == CallingConv::Tail); |
2973 | } |
2974 | |
2975 | /// Return true if we might ever do TCO for calls with this calling convention. |
2976 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
2977 | switch (CC) { |
2978 | // C calling conventions: |
2979 | case CallingConv::C: |
2980 | case CallingConv::Win64: |
2981 | case CallingConv::X86_64_SysV: |
2982 | // Callee pop conventions: |
2983 | case CallingConv::X86_ThisCall: |
2984 | case CallingConv::X86_StdCall: |
2985 | case CallingConv::X86_VectorCall: |
2986 | case CallingConv::X86_FastCall: |
2987 | // Swift: |
2988 | case CallingConv::Swift: |
2989 | return true; |
2990 | default: |
2991 | return canGuaranteeTCO(CC); |
2992 | } |
2993 | } |
2994 | |
2995 | /// Return true if the function is being made into a tailcall target by |
2996 | /// changing its ABI. |
2997 | static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { |
2998 | return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; |
2999 | } |
3000 | |
3001 | bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
3002 | auto Attr = |
3003 | CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); |
3004 | if (!CI->isTailCall() || Attr.getValueAsString() == "true") |
3005 | return false; |
3006 | |
3007 | ImmutableCallSite CS(CI); |
3008 | CallingConv::ID CalleeCC = CS.getCallingConv(); |
3009 | if (!mayTailCallThisCC(CalleeCC)) |
3010 | return false; |
3011 | |
3012 | return true; |
3013 | } |
3014 | |
3015 | SDValue |
3016 | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
3017 | const SmallVectorImpl<ISD::InputArg> &Ins, |
3018 | const SDLoc &dl, SelectionDAG &DAG, |
3019 | const CCValAssign &VA, |
3020 | MachineFrameInfo &MFI, unsigned i) const { |
3021 | // Create the nodes corresponding to a load from this parameter slot. |
3022 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
3023 | bool AlwaysUseMutable = shouldGuaranteeTCO( |
3024 | CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); |
3025 | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); |
3026 | EVT ValVT; |
3027 | MVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3028 | |
3029 | // If value is passed by pointer we have address passed instead of the value |
3030 | // itself. No need to extend if the mask value and location share the same |
3031 | // absolute size. |
3032 | bool ExtendedInMem = |
3033 | VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && |
3034 | VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); |
3035 | |
3036 | if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) |
3037 | ValVT = VA.getLocVT(); |
3038 | else |
3039 | ValVT = VA.getValVT(); |
3040 | |
3041 | // FIXME: For now, all byval parameter objects are marked mutable. This can be |
3042 | // changed with more analysis. |
3043 | // In case of tail call optimization mark all arguments mutable. Since they |
3044 | // could be overwritten by lowering of arguments in case of a tail call. |
3045 | if (Flags.isByVal()) { |
3046 | unsigned Bytes = Flags.getByValSize(); |
3047 | if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. |
3048 | |
3049 | // FIXME: For now, all byval parameter objects are marked as aliasing. This |
3050 | // can be improved with deeper analysis. |
3051 | int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, |
3052 | /*isAliased=*/true); |
3053 | return DAG.getFrameIndex(FI, PtrVT); |
3054 | } |
3055 | |
3056 | // This is an argument in memory. We might be able to perform copy elision. |
3057 | // If the argument is passed directly in memory without any extension, then we |
3058 | // can perform copy elision. Large vector types, for example, may be passed |
3059 | // indirectly by pointer. |
3060 | if (Flags.isCopyElisionCandidate() && |
3061 | VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) { |
3062 | EVT ArgVT = Ins[i].ArgVT; |
3063 | SDValue PartAddr; |
3064 | if (Ins[i].PartOffset == 0) { |
3065 | // If this is a one-part value or the first part of a multi-part value, |
3066 | // create a stack object for the entire argument value type and return a |
3067 | // load from our portion of it. This assumes that if the first part of an |
3068 | // argument is in memory, the rest will also be in memory. |
3069 | int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), |
3070 | /*IsImmutable=*/false); |
3071 | PartAddr = DAG.getFrameIndex(FI, PtrVT); |
3072 | return DAG.getLoad( |
3073 | ValVT, dl, Chain, PartAddr, |
3074 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3075 | } else { |
3076 | // This is not the first piece of an argument in memory. See if there is |
3077 | // already a fixed stack object including this offset. If so, assume it |
3078 | // was created by the PartOffset == 0 branch above and create a load from |
3079 | // the appropriate offset into it. |
3080 | int64_t PartBegin = VA.getLocMemOffset(); |
3081 | int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; |
3082 | int FI = MFI.getObjectIndexBegin(); |
3083 | for (; MFI.isFixedObjectIndex(FI); ++FI) { |
3084 | int64_t ObjBegin = MFI.getObjectOffset(FI); |
3085 | int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); |
3086 | if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) |
3087 | break; |
3088 | } |
3089 | if (MFI.isFixedObjectIndex(FI)) { |
3090 | SDValue Addr = |
3091 | DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), |
3092 | DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); |
3093 | return DAG.getLoad( |
3094 | ValVT, dl, Chain, Addr, |
3095 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, |
3096 | Ins[i].PartOffset)); |
3097 | } |
3098 | } |
3099 | } |
3100 | |
3101 | int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, |
3102 | VA.getLocMemOffset(), isImmutable); |
3103 | |
3104 | // Set SExt or ZExt flag. |
3105 | if (VA.getLocInfo() == CCValAssign::ZExt) { |
3106 | MFI.setObjectZExt(FI, true); |
3107 | } else if (VA.getLocInfo() == CCValAssign::SExt) { |
3108 | MFI.setObjectSExt(FI, true); |
3109 | } |
3110 | |
3111 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3112 | SDValue Val = DAG.getLoad( |
3113 | ValVT, dl, Chain, FIN, |
3114 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3115 | return ExtendedInMem |
3116 | ? (VA.getValVT().isVector() |
3117 | ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) |
3118 | : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) |
3119 | : Val; |
3120 | } |
3121 | |
3122 | // FIXME: Get this from tablegen. |
3123 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, |
3124 | const X86Subtarget &Subtarget) { |
3125 | assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3125, __PRETTY_FUNCTION__)); |
3126 | |
3127 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3128 | static const MCPhysReg GPR64ArgRegsWin64[] = { |
3129 | X86::RCX, X86::RDX, X86::R8, X86::R9 |
3130 | }; |
3131 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); |
3132 | } |
3133 | |
3134 | static const MCPhysReg GPR64ArgRegs64Bit[] = { |
3135 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 |
3136 | }; |
3137 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); |
3138 | } |
3139 | |
3140 | // FIXME: Get this from tablegen. |
3141 | static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, |
3142 | CallingConv::ID CallConv, |
3143 | const X86Subtarget &Subtarget) { |
3144 | assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3144, __PRETTY_FUNCTION__)); |
3145 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3146 | // The XMM registers which might contain var arg parameters are shadowed |
3147 | // in their paired GPR. So we only need to save the GPR to their home |
3148 | // slots. |
3149 | // TODO: __vectorcall will change this. |
3150 | return None; |
3151 | } |
3152 | |
3153 | const Function &F = MF.getFunction(); |
3154 | bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); |
3155 | bool isSoftFloat = Subtarget.useSoftFloat(); |
3156 | assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3157, __PRETTY_FUNCTION__)) |
3157 | "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3157, __PRETTY_FUNCTION__)); |
3158 | if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1()) |
3159 | // Kernel mode asks for SSE to be disabled, so there are no XMM argument |
3160 | // registers. |
3161 | return None; |
3162 | |
3163 | static const MCPhysReg XMMArgRegs64Bit[] = { |
3164 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
3165 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
3166 | }; |
3167 | return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); |
3168 | } |
3169 | |
3170 | #ifndef NDEBUG |
3171 | static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { |
3172 | return std::is_sorted(ArgLocs.begin(), ArgLocs.end(), |
3173 | [](const CCValAssign &A, const CCValAssign &B) -> bool { |
3174 | return A.getValNo() < B.getValNo(); |
3175 | }); |
3176 | } |
3177 | #endif |
3178 | |
3179 | SDValue X86TargetLowering::LowerFormalArguments( |
3180 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
3181 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3182 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3183 | MachineFunction &MF = DAG.getMachineFunction(); |
3184 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
3185 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
3186 | |
3187 | const Function &F = MF.getFunction(); |
3188 | if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && |
3189 | F.getName() == "main") |
3190 | FuncInfo->setForceFramePointer(true); |
3191 | |
3192 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3193 | bool Is64Bit = Subtarget.is64Bit(); |
3194 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
3195 | |
3196 | assert(((!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe" ) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3198, __PRETTY_FUNCTION__)) |
3197 | !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe" ) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3198, __PRETTY_FUNCTION__)) |
3198 | "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe" ) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3198, __PRETTY_FUNCTION__)); |
3199 | |
3200 | // Assign locations to all of the incoming arguments. |
3201 | SmallVector<CCValAssign, 16> ArgLocs; |
3202 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
3203 | |
3204 | // Allocate shadow area for Win64. |
3205 | if (IsWin64) |
3206 | CCInfo.AllocateStack(32, 8); |
3207 | |
3208 | CCInfo.AnalyzeArguments(Ins, CC_X86); |
3209 | |
3210 | // In vectorcall calling convention a second pass is required for the HVA |
3211 | // types. |
3212 | if (CallingConv::X86_VectorCall == CallConv) { |
3213 | CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); |
3214 | } |
3215 | |
3216 | // The next loop assumes that the locations are in the same order of the |
3217 | // input arguments. |
3218 | assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering" ) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3219, __PRETTY_FUNCTION__)) |
3219 | "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering" ) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3219, __PRETTY_FUNCTION__)); |
3220 | |
3221 | SDValue ArgValue; |
3222 | for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; |
3223 | ++I, ++InsIndex) { |
3224 | assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast <void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3224, __PRETTY_FUNCTION__)); |
3225 | CCValAssign &VA = ArgLocs[I]; |
3226 | |
3227 | if (VA.isRegLoc()) { |
3228 | EVT RegVT = VA.getLocVT(); |
3229 | if (VA.needsCustom()) { |
3230 | assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3232, __PRETTY_FUNCTION__)) |
3231 | VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3232, __PRETTY_FUNCTION__)) |
3232 | "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3232, __PRETTY_FUNCTION__)); |
3233 | |
3234 | // v64i1 values, in regcall calling convention, that are |
3235 | // compiled to 32 bit arch, are split up into two registers. |
3236 | ArgValue = |
3237 | getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); |
3238 | } else { |
3239 | const TargetRegisterClass *RC; |
3240 | if (RegVT == MVT::i8) |
3241 | RC = &X86::GR8RegClass; |
3242 | else if (RegVT == MVT::i16) |
3243 | RC = &X86::GR16RegClass; |
3244 | else if (RegVT == MVT::i32) |
3245 | RC = &X86::GR32RegClass; |
3246 | else if (Is64Bit && RegVT == MVT::i64) |
3247 | RC = &X86::GR64RegClass; |
3248 | else if (RegVT == MVT::f32) |
3249 | RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
3250 | else if (RegVT == MVT::f64) |
3251 | RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
3252 | else if (RegVT == MVT::f80) |
3253 | RC = &X86::RFP80RegClass; |
3254 | else if (RegVT == MVT::f128) |
3255 | RC = &X86::VR128RegClass; |
3256 | else if (RegVT.is512BitVector()) |
3257 | RC = &X86::VR512RegClass; |
3258 | else if (RegVT.is256BitVector()) |
3259 | RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
3260 | else if (RegVT.is128BitVector()) |
3261 | RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
3262 | else if (RegVT == MVT::x86mmx) |
3263 | RC = &X86::VR64RegClass; |
3264 | else if (RegVT == MVT::v1i1) |
3265 | RC = &X86::VK1RegClass; |
3266 | else if (RegVT == MVT::v8i1) |
3267 | RC = &X86::VK8RegClass; |
3268 | else if (RegVT == MVT::v16i1) |
3269 | RC = &X86::VK16RegClass; |
3270 | else if (RegVT == MVT::v32i1) |
3271 | RC = &X86::VK32RegClass; |
3272 | else if (RegVT == MVT::v64i1) |
3273 | RC = &X86::VK64RegClass; |
3274 | else |
3275 | llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3275); |
3276 | |
3277 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); |
3278 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); |
3279 | } |
3280 | |
3281 | // If this is an 8 or 16-bit value, it is really passed promoted to 32 |
3282 | // bits. Insert an assert[sz]ext to capture this, then truncate to the |
3283 | // right size. |
3284 | if (VA.getLocInfo() == CCValAssign::SExt) |
3285 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, |
3286 | DAG.getValueType(VA.getValVT())); |
3287 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
3288 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, |
3289 | DAG.getValueType(VA.getValVT())); |
3290 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
3291 | ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); |
3292 | |
3293 | if (VA.isExtInLoc()) { |
3294 | // Handle MMX values passed in XMM regs. |
3295 | if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) |
3296 | ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); |
3297 | else if (VA.getValVT().isVector() && |
3298 | VA.getValVT().getScalarType() == MVT::i1 && |
3299 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
3300 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
3301 | // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 |
3302 | ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); |
3303 | } else |
3304 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
3305 | } |
3306 | } else { |
3307 | assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3307, __PRETTY_FUNCTION__)); |
3308 | ArgValue = |
3309 | LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); |
3310 | } |
3311 | |
3312 | // If value is passed via pointer - do a load. |
3313 | if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal()) |
3314 | ArgValue = |
3315 | DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); |
3316 | |
3317 | InVals.push_back(ArgValue); |
3318 | } |
3319 | |
3320 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
3321 | // Swift calling convention does not require we copy the sret argument |
3322 | // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. |
3323 | if (CallConv == CallingConv::Swift) |
3324 | continue; |
3325 | |
3326 | // All x86 ABIs require that for returning structs by value we copy the |
3327 | // sret argument into %rax/%eax (depending on ABI) for the return. Save |
3328 | // the argument into a virtual register so that we can access it from the |
3329 | // return points. |
3330 | if (Ins[I].Flags.isSRet()) { |
3331 | unsigned Reg = FuncInfo->getSRetReturnReg(); |
3332 | if (!Reg) { |
3333 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
3334 | Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
3335 | FuncInfo->setSRetReturnReg(Reg); |
3336 | } |
3337 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); |
3338 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); |
3339 | break; |
3340 | } |
3341 | } |
3342 | |
3343 | unsigned StackSize = CCInfo.getNextStackOffset(); |
3344 | // Align stack specially for tail calls. |
3345 | if (shouldGuaranteeTCO(CallConv, |
3346 | MF.getTarget().Options.GuaranteedTailCallOpt)) |
3347 | StackSize = GetAlignedArgumentStackSize(StackSize, DAG); |
3348 | |
3349 | // If the function takes variable number of arguments, make a frame index for |
3350 | // the start of the first vararg value... for expansion of llvm.va_start. We |
3351 | // can skip this if there are no va_start calls. |
3352 | if (MFI.hasVAStart() && |
3353 | (Is64Bit || (CallConv != CallingConv::X86_FastCall && |
3354 | CallConv != CallingConv::X86_ThisCall))) { |
3355 | FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true)); |
3356 | } |
3357 | |
3358 | // Figure out if XMM registers are in use. |
3359 | assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute ::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3361, __PRETTY_FUNCTION__)) |
3360 | F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute ::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3361, __PRETTY_FUNCTION__)) |
3361 | "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute ::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3361, __PRETTY_FUNCTION__)); |
3362 | |
3363 | // 64-bit calling conventions support varargs and register parameters, so we |
3364 | // have to do extra work to spill them in the prologue. |
3365 | if (Is64Bit && isVarArg && MFI.hasVAStart()) { |
3366 | // Find the first unallocated argument registers. |
3367 | ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); |
3368 | ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); |
3369 | unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); |
3370 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); |
3371 | assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3372, __PRETTY_FUNCTION__)) |
3372 | "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!" ) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3372, __PRETTY_FUNCTION__)); |
3373 | |
3374 | // Gather all the live in physical registers. |
3375 | SmallVector<SDValue, 6> LiveGPRs; |
3376 | SmallVector<SDValue, 8> LiveXMMRegs; |
3377 | SDValue ALVal; |
3378 | for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { |
3379 | unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass); |
3380 | LiveGPRs.push_back( |
3381 | DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); |
3382 | } |
3383 | if (!ArgXMMs.empty()) { |
3384 | unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); |
3385 | ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8); |
3386 | for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) { |
3387 | unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass); |
3388 | LiveXMMRegs.push_back( |
3389 | DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32)); |
3390 | } |
3391 | } |
3392 | |
3393 | if (IsWin64) { |
3394 | // Get to the caller-allocated home save location. Add 8 to account |
3395 | // for the return address. |
3396 | int HomeOffset = TFI.getOffsetOfLocalArea() + 8; |
3397 | FuncInfo->setRegSaveFrameIndex( |
3398 | MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); |
3399 | // Fixup to set vararg frame on shadow area (4 x i64). |
3400 | if (NumIntRegs < 4) |
3401 | FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); |
3402 | } else { |
3403 | // For X86-64, if there are vararg parameters that are passed via |
3404 | // registers, then we must store them to their spots on the stack so |
3405 | // they may be loaded by dereferencing the result of va_next. |
3406 | FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); |
3407 | FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); |
3408 | FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject( |
3409 | ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); |
3410 | } |
3411 | |
3412 | // Store the integer parameter registers. |
3413 | SmallVector<SDValue, 8> MemOps; |
3414 | SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), |
3415 | getPointerTy(DAG.getDataLayout())); |
3416 | unsigned Offset = FuncInfo->getVarArgsGPOffset(); |
3417 | for (SDValue Val : LiveGPRs) { |
3418 | SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
3419 | RSFIN, DAG.getIntPtrConstant(Offset, dl)); |
3420 | SDValue Store = |
3421 | DAG.getStore(Val.getValue(1), dl, Val, FIN, |
3422 | MachinePointerInfo::getFixedStack( |
3423 | DAG.getMachineFunction(), |
3424 | FuncInfo->getRegSaveFrameIndex(), Offset)); |
3425 | MemOps.push_back(Store); |
3426 | Offset += 8; |
3427 | } |
3428 | |
3429 | if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { |
3430 | // Now store the XMM (fp + vector) parameter registers. |
3431 | SmallVector<SDValue, 12> SaveXMMOps; |
3432 | SaveXMMOps.push_back(Chain); |
3433 | SaveXMMOps.push_back(ALVal); |
3434 | SaveXMMOps.push_back(DAG.getIntPtrConstant( |
3435 | FuncInfo->getRegSaveFrameIndex(), dl)); |
3436 | SaveXMMOps.push_back(DAG.getIntPtrConstant( |
3437 | FuncInfo->getVarArgsFPOffset(), dl)); |
3438 | SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), |
3439 | LiveXMMRegs.end()); |
3440 | MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, |
3441 | MVT::Other, SaveXMMOps)); |
3442 | } |
3443 | |
3444 | if (!MemOps.empty()) |
3445 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); |
3446 | } |
3447 | |
3448 | if (isVarArg && MFI.hasMustTailInVarArgFunc()) { |
3449 | // Find the largest legal vector type. |
3450 | MVT VecVT = MVT::Other; |
3451 | // FIXME: Only some x86_32 calling conventions support AVX512. |
3452 | if (Subtarget.useAVX512Regs() && |
3453 | (Is64Bit || (CallConv == CallingConv::X86_VectorCall || |
3454 | CallConv == CallingConv::Intel_OCL_BI))) |
3455 | VecVT = MVT::v16f32; |
3456 | else if (Subtarget.hasAVX()) |
3457 | VecVT = MVT::v8f32; |
3458 | else if (Subtarget.hasSSE2()) |
3459 | VecVT = MVT::v4f32; |
3460 | |
3461 | // We forward some GPRs and some vector types. |
3462 | SmallVector<MVT, 2> RegParmTypes; |
3463 | MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32; |
3464 | RegParmTypes.push_back(IntVT); |
3465 | if (VecVT != MVT::Other) |
3466 | RegParmTypes.push_back(VecVT); |
3467 | |
3468 | // Compute the set of forwarded registers. The rest are scratch. |
3469 | SmallVectorImpl<ForwardedRegister> &Forwards = |
3470 | FuncInfo->getForwardedMustTailRegParms(); |
3471 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); |
3472 | |
3473 | // Conservatively forward AL on x86_64, since it might be used for varargs. |
3474 | if (Is64Bit && !CCInfo.isAllocated(X86::AL)) { |
3475 | unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass); |
3476 | Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); |
3477 | } |
3478 | |
3479 | // Copy all forwards from physical to virtual registers. |
3480 | for (ForwardedRegister &FR : Forwards) { |
3481 | // FIXME: Can we use a less constrained schedule? |
3482 | SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT); |
3483 | FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT)); |
3484 | Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal); |
3485 | } |
3486 | } |
3487 | |
3488 | // Some CCs need callee pop. |
3489 | if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, |
3490 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
3491 | FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. |
3492 | } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { |
3493 | // X86 interrupts must pop the error code (and the alignment padding) if |
3494 | // present. |
3495 | FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); |
3496 | } else { |
3497 | FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. |
3498 | // If this is an sret function, the return should pop the hidden pointer. |
3499 | if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
3500 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
3501 | argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) |
3502 | FuncInfo->setBytesToPopOnReturn(4); |
3503 | } |
3504 | |
3505 | if (!Is64Bit) { |
3506 | // RegSaveFrameIndex is X86-64 only. |
3507 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
3508 | if (CallConv == CallingConv::X86_FastCall || |
3509 | CallConv == CallingConv::X86_ThisCall) |
3510 | // fastcc functions can't have varargs. |
3511 | FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); |
3512 | } |
3513 | |
3514 | FuncInfo->setArgumentStackSize(StackSize); |
3515 | |
3516 | if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { |
3517 | EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); |
3518 | if (Personality == EHPersonality::CoreCLR) { |
3519 | assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3519, __PRETTY_FUNCTION__)); |
3520 | // TODO: Add a mechanism to frame lowering that will allow us to indicate |
3521 | // that we'd prefer this slot be allocated towards the bottom of the frame |
3522 | // (i.e. near the stack pointer after allocating the frame). Every |
3523 | // funclet needs a copy of this slot in its (mostly empty) frame, and the |
3524 | // offset from the bottom of this and each funclet's frame must be the |
3525 | // same, so the size of funclets' (mostly empty) frames is dictated by |
3526 | // how far this slot is from the bottom (since they allocate just enough |
3527 | // space to accommodate holding this slot at the correct offset). |
3528 | int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false); |
3529 | EHInfo->PSPSymFrameIdx = PSPSymFI; |
3530 | } |
3531 | } |
3532 | |
3533 | if (CallConv == CallingConv::X86_RegCall || |
3534 | F.hasFnAttribute("no_caller_saved_registers")) { |
3535 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
3536 | for (std::pair<unsigned, unsigned> Pair : MRI.liveins()) |
3537 | MRI.disableCalleeSavedRegister(Pair.first); |
3538 | } |
3539 | |
3540 | return Chain; |
3541 | } |
3542 | |
3543 | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
3544 | SDValue Arg, const SDLoc &dl, |
3545 | SelectionDAG &DAG, |
3546 | const CCValAssign &VA, |
3547 | ISD::ArgFlagsTy Flags) const { |
3548 | unsigned LocMemOffset = VA.getLocMemOffset(); |
3549 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
3550 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
3551 | StackPtr, PtrOff); |
3552 | if (Flags.isByVal()) |
3553 | return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); |
3554 | |
3555 | return DAG.getStore( |
3556 | Chain, dl, Arg, PtrOff, |
3557 | MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); |
3558 | } |
3559 | |
3560 | /// Emit a load of return address if tail call |
3561 | /// optimization is performed and it is required. |
3562 | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( |
3563 | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, |
3564 | bool Is64Bit, int FPDiff, const SDLoc &dl) const { |
3565 | // Adjust the Return address stack slot. |
3566 | EVT VT = getPointerTy(DAG.getDataLayout()); |
3567 | OutRetAddr = getReturnAddressFrameIndex(DAG); |
3568 | |
3569 | // Load the "old" Return address. |
3570 | OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); |
3571 | return SDValue(OutRetAddr.getNode(), 1); |
3572 | } |
3573 | |
3574 | /// Emit a store of the return address if tail call |
3575 | /// optimization is performed and it is required (FPDiff!=0). |
3576 | static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, |
3577 | SDValue Chain, SDValue RetAddrFrIdx, |
3578 | EVT PtrVT, unsigned SlotSize, |
3579 | int FPDiff, const SDLoc &dl) { |
3580 | // Store the return address to the appropriate stack slot. |
3581 | if (!FPDiff) return Chain; |
3582 | // Calculate the new stack slot for the return address. |
3583 | int NewReturnAddrFI = |
3584 | MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, |
3585 | false); |
3586 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); |
3587 | Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, |
3588 | MachinePointerInfo::getFixedStack( |
3589 | DAG.getMachineFunction(), NewReturnAddrFI)); |
3590 | return Chain; |
3591 | } |
3592 | |
3593 | /// Returns a vector_shuffle mask for an movs{s|d}, movd |
3594 | /// operation of specified width. |
3595 | static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
3596 | SDValue V2) { |
3597 | unsigned NumElems = VT.getVectorNumElements(); |
3598 | SmallVector<int, 8> Mask; |
3599 | Mask.push_back(NumElems); |
3600 | for (unsigned i = 1; i != NumElems; ++i) |
3601 | Mask.push_back(i); |
3602 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
3603 | } |
3604 | |
3605 | SDValue |
3606 | X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
3607 | SmallVectorImpl<SDValue> &InVals) const { |
3608 | SelectionDAG &DAG = CLI.DAG; |
3609 | SDLoc &dl = CLI.DL; |
3610 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
3611 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
3612 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
3613 | SDValue Chain = CLI.Chain; |
3614 | SDValue Callee = CLI.Callee; |
3615 | CallingConv::ID CallConv = CLI.CallConv; |
3616 | bool &isTailCall = CLI.IsTailCall; |
3617 | bool isVarArg = CLI.IsVarArg; |
3618 | |
3619 | MachineFunction &MF = DAG.getMachineFunction(); |
3620 | bool Is64Bit = Subtarget.is64Bit(); |
3621 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
3622 | StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); |
3623 | bool IsSibcall = false; |
3624 | bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || |
3625 | CallConv == CallingConv::Tail; |
3626 | X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); |
3627 | auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); |
3628 | const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction()); |
3629 | const Function *Fn = CI ? CI->getCalledFunction() : nullptr; |
3630 | bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || |
3631 | (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); |
3632 | const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction()); |
3633 | bool HasNoCfCheck = |
3634 | (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()); |
3635 | const Module *M = MF.getMMI().getModule(); |
3636 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
3637 | |
3638 | MachineFunction::CallSiteInfo CSInfo; |
3639 | |
3640 | if (CallConv == CallingConv::X86_INTR) |
3641 | report_fatal_error("X86 interrupts may not be called directly"); |
3642 | |
3643 | if (Attr.getValueAsString() == "true") |
3644 | isTailCall = false; |
3645 | |
3646 | if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) { |
3647 | // If we are using a GOT, disable tail calls to external symbols with |
3648 | // default visibility. Tail calling such a symbol requires using a GOT |
3649 | // relocation, which forces early binding of the symbol. This breaks code |
3650 | // that require lazy function symbol resolution. Using musttail or |
3651 | // GuaranteedTailCallOpt will override this. |
3652 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
3653 | if (!G || (!G->getGlobal()->hasLocalLinkage() && |
3654 | G->getGlobal()->hasDefaultVisibility())) |
3655 | isTailCall = false; |
3656 | } |
3657 | |
3658 | bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall(); |
3659 | if (IsMustTail) { |
3660 | // Force this to be a tail call. The verifier rules are enough to ensure |
3661 | // that we can lower this successfully without moving the return address |
3662 | // around. |
3663 | isTailCall = true; |
3664 | } else if (isTailCall) { |
3665 | // Check if it's really possible to do a tail call. |
3666 | isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, |
3667 | isVarArg, SR != NotStructReturn, |
3668 | MF.getFunction().hasStructRetAttr(), CLI.RetTy, |
3669 | Outs, OutVals, Ins, DAG); |
3670 | |
3671 | // Sibcalls are automatically detected tailcalls which do not require |
3672 | // ABI changes. |
3673 | if (!IsGuaranteeTCO && isTailCall) |
3674 | IsSibcall = true; |
3675 | |
3676 | if (isTailCall) |
3677 | ++NumTailCalls; |
3678 | } |
3679 | |
3680 | assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe" ) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3681, __PRETTY_FUNCTION__)) |
3681 | "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe" ) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3681, __PRETTY_FUNCTION__)); |
3682 | |
3683 | // Analyze operands of the call, assigning locations to each operand. |
3684 | SmallVector<CCValAssign, 16> ArgLocs; |
3685 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
3686 | |
3687 | // Allocate shadow area for Win64. |
3688 | if (IsWin64) |
3689 | CCInfo.AllocateStack(32, 8); |
3690 | |
3691 | CCInfo.AnalyzeArguments(Outs, CC_X86); |
3692 | |
3693 | // In vectorcall calling convention a second pass is required for the HVA |
3694 | // types. |
3695 | if (CallingConv::X86_VectorCall == CallConv) { |
3696 | CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); |
3697 | } |
3698 | |
3699 | // Get a count of how many bytes are to be pushed on the stack. |
3700 | unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
3701 | if (IsSibcall) |
3702 | // This is a sibcall. The memory operands are available in caller's |
3703 | // own caller's stack. |
3704 | NumBytes = 0; |
3705 | else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) |
3706 | NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); |
3707 | |
3708 | int FPDiff = 0; |
3709 | if (isTailCall && !IsSibcall && !IsMustTail) { |
3710 | // Lower arguments at fp - stackoffset + fpdiff. |
3711 | unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); |
3712 | |
3713 | FPDiff = NumBytesCallerPushed - NumBytes; |
3714 | |
3715 | // Set the delta of movement of the returnaddr stackslot. |
3716 | // But only set if delta is greater than previous delta. |
3717 | if (FPDiff < X86Info->getTCReturnAddrDelta()) |
3718 | X86Info->setTCReturnAddrDelta(FPDiff); |
3719 | } |
3720 | |
3721 | unsigned NumBytesToPush = NumBytes; |
3722 | unsigned NumBytesToPop = NumBytes; |
3723 | |
3724 | // If we have an inalloca argument, all stack space has already been allocated |
3725 | // for us and be right at the top of the stack. We don't support multiple |
3726 | // arguments passed in memory when using inalloca. |
3727 | if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { |
3728 | NumBytesToPush = 0; |
3729 | if (!ArgLocs.back().isMemLoc()) |
3730 | report_fatal_error("cannot use inalloca attribute on a register " |
3731 | "parameter"); |
3732 | if (ArgLocs.back().getLocMemOffset() != 0) |
3733 | report_fatal_error("any parameter with the inalloca attribute must be " |
3734 | "the only memory argument"); |
3735 | } |
3736 | |
3737 | if (!IsSibcall) |
3738 | Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, |
3739 | NumBytes - NumBytesToPush, dl); |
3740 | |
3741 | SDValue RetAddrFrIdx; |
3742 | // Load return address for tail calls. |
3743 | if (isTailCall && FPDiff) |
3744 | Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, |
3745 | Is64Bit, FPDiff, dl); |
3746 | |
3747 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
3748 | SmallVector<SDValue, 8> MemOpChains; |
3749 | SDValue StackPtr; |
3750 | |
3751 | // The next loop assumes that the locations are in the same order of the |
3752 | // input arguments. |
3753 | assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering" ) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3754, __PRETTY_FUNCTION__)) |
3754 | "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering" ) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3754, __PRETTY_FUNCTION__)); |
3755 | |
3756 | // Walk the register/memloc assignments, inserting copies/loads. In the case |
3757 | // of tail call optimization arguments are handle later. |
3758 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
3759 | for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; |
3760 | ++I, ++OutIndex) { |
3761 | assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ? static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3761, __PRETTY_FUNCTION__)); |
3762 | // Skip inalloca arguments, they have already been written. |
3763 | ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; |
3764 | if (Flags.isInAlloca()) |
3765 | continue; |
3766 | |
3767 | CCValAssign &VA = ArgLocs[I]; |
3768 | EVT RegVT = VA.getLocVT(); |
3769 | SDValue Arg = OutVals[OutIndex]; |
3770 | bool isByVal = Flags.isByVal(); |
3771 | |
3772 | // Promote the value if needed. |
3773 | switch (VA.getLocInfo()) { |
3774 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3774); |
3775 | case CCValAssign::Full: break; |
3776 | case CCValAssign::SExt: |
3777 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); |
3778 | break; |
3779 | case CCValAssign::ZExt: |
3780 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); |
3781 | break; |
3782 | case CCValAssign::AExt: |
3783 | if (Arg.getValueType().isVector() && |
3784 | Arg.getValueType().getVectorElementType() == MVT::i1) |
3785 | Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); |
3786 | else if (RegVT.is128BitVector()) { |
3787 | // Special case: passing MMX values in XMM registers. |
3788 | Arg = DAG.getBitcast(MVT::i64, Arg); |
3789 | Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); |
3790 | Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); |
3791 | } else |
3792 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); |
3793 | break; |
3794 | case CCValAssign::BCvt: |
3795 | Arg = DAG.getBitcast(RegVT, Arg); |
3796 | break; |
3797 | case CCValAssign::Indirect: { |
3798 | if (isByVal) { |
3799 | // Memcpy the argument to a temporary stack slot to prevent |
3800 | // the caller from seeing any modifications the callee may make |
3801 | // as guaranteed by the `byval` attribute. |
3802 | int FrameIdx = MF.getFrameInfo().CreateStackObject( |
3803 | Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()), |
3804 | false); |
3805 | SDValue StackSlot = |
3806 | DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); |
3807 | Chain = |
3808 | CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); |
3809 | // From now on treat this as a regular pointer |
3810 | Arg = StackSlot; |
3811 | isByVal = false; |
3812 | } else { |
3813 | // Store the argument. |
3814 | SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); |
3815 | int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); |
3816 | Chain = DAG.getStore( |
3817 | Chain, dl, Arg, SpillSlot, |
3818 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3819 | Arg = SpillSlot; |
3820 | } |
3821 | break; |
3822 | } |
3823 | } |
3824 | |
3825 | if (VA.needsCustom()) { |
3826 | assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3827, __PRETTY_FUNCTION__)) |
3827 | "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3827, __PRETTY_FUNCTION__)); |
3828 | // Split v64i1 value into two registers |
3829 | Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); |
3830 | } else if (VA.isRegLoc()) { |
3831 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
3832 | const TargetOptions &Options = DAG.getTarget().Options; |
3833 | if (Options.EnableDebugEntryValues) |
3834 | CSInfo.emplace_back(VA.getLocReg(), I); |
3835 | if (isVarArg && IsWin64) { |
3836 | // Win64 ABI requires argument XMM reg to be copied to the corresponding |
3837 | // shadow reg if callee is a varargs function. |
3838 | unsigned ShadowReg = 0; |
3839 | switch (VA.getLocReg()) { |
3840 | case X86::XMM0: ShadowReg = X86::RCX; break; |
3841 | case X86::XMM1: ShadowReg = X86::RDX; break; |
3842 | case X86::XMM2: ShadowReg = X86::R8; break; |
3843 | case X86::XMM3: ShadowReg = X86::R9; break; |
3844 | } |
3845 | if (ShadowReg) |
3846 | RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); |
3847 | } |
3848 | } else if (!IsSibcall && (!isTailCall || isByVal)) { |
3849 | assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3849, __PRETTY_FUNCTION__)); |
3850 | if (!StackPtr.getNode()) |
3851 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
3852 | getPointerTy(DAG.getDataLayout())); |
3853 | MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, |
3854 | dl, DAG, VA, Flags)); |
3855 | } |
3856 | } |
3857 | |
3858 | if (!MemOpChains.empty()) |
3859 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
3860 | |
3861 | if (Subtarget.isPICStyleGOT()) { |
3862 | // ELF / PIC requires GOT in the EBX register before function calls via PLT |
3863 | // GOT pointer. |
3864 | if (!isTailCall) { |
3865 | RegsToPass.push_back(std::make_pair( |
3866 | unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
3867 | getPointerTy(DAG.getDataLayout())))); |
3868 | } else { |
3869 | // If we are tail calling and generating PIC/GOT style code load the |
3870 | // address of the callee into ECX. The value in ecx is used as target of |
3871 | // the tail jump. This is done to circumvent the ebx/callee-saved problem |
3872 | // for tail calls on PIC/GOT architectures. Normally we would just put the |
3873 | // address of GOT into ebx and then call target@PLT. But for tail calls |
3874 | // ebx would be restored (since ebx is callee saved) before jumping to the |
3875 | // target@PLT. |
3876 | |
3877 | // Note: The actual moving to ECX is done further down. |
3878 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
3879 | if (G && !G->getGlobal()->hasLocalLinkage() && |
3880 | G->getGlobal()->hasDefaultVisibility()) |
3881 | Callee = LowerGlobalAddress(Callee, DAG); |
3882 | else if (isa<ExternalSymbolSDNode>(Callee)) |
3883 | Callee = LowerExternalSymbol(Callee, DAG); |
3884 | } |
3885 | } |
3886 | |
3887 | if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { |
3888 | // From AMD64 ABI document: |
3889 | // For calls that may call functions that use varargs or stdargs |
3890 | // (prototype-less calls or calls to functions containing ellipsis (...) in |
3891 | // the declaration) %al is used as hidden argument to specify the number |
3892 | // of SSE registers used. The contents of %al do not need to match exactly |
3893 | // the number of registers, but must be an ubound on the number of SSE |
3894 | // registers used and is in the range 0 - 8 inclusive. |
3895 | |
3896 | // Count the number of XMM registers allocated. |
3897 | static const MCPhysReg XMMArgRegs[] = { |
3898 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
3899 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
3900 | }; |
3901 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); |
3902 | assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled" ) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3903, __PRETTY_FUNCTION__)) |
3903 | && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled" ) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3903, __PRETTY_FUNCTION__)); |
3904 | |
3905 | RegsToPass.push_back(std::make_pair(unsigned(X86::AL), |
3906 | DAG.getConstant(NumXMMRegs, dl, |
3907 | MVT::i8))); |
3908 | } |
3909 | |
3910 | if (isVarArg && IsMustTail) { |
3911 | const auto &Forwards = X86Info->getForwardedMustTailRegParms(); |
3912 | for (const auto &F : Forwards) { |
3913 | SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); |
3914 | RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); |
3915 | } |
3916 | } |
3917 | |
3918 | // For tail calls lower the arguments to the 'real' stack slots. Sibcalls |
3919 | // don't need this because the eligibility check rejects calls that require |
3920 | // shuffling arguments passed in memory. |
3921 | if (!IsSibcall && isTailCall) { |
3922 | // Force all the incoming stack arguments to be loaded from the stack |
3923 | // before any new outgoing arguments are stored to the stack, because the |
3924 | // outgoing stack slots may alias the incoming argument stack slots, and |
3925 | // the alias isn't otherwise explicit. This is slightly more conservative |
3926 | // than necessary, because it means that each store effectively depends |
3927 | // on every argument instead of just those arguments it would clobber. |
3928 | SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); |
3929 | |
3930 | SmallVector<SDValue, 8> MemOpChains2; |
3931 | SDValue FIN; |
3932 | int FI = 0; |
3933 | for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; |
3934 | ++I, ++OutsIndex) { |
3935 | CCValAssign &VA = ArgLocs[I]; |
3936 | |
3937 | if (VA.isRegLoc()) { |
3938 | if (VA.needsCustom()) { |
3939 | assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention" ) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3940, __PRETTY_FUNCTION__)) |
3940 | "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention" ) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3940, __PRETTY_FUNCTION__)); |
3941 | // This means that we are in special case where one argument was |
3942 | // passed through two register locations - Skip the next location |
3943 | ++I; |
3944 | } |
3945 | |
3946 | continue; |
3947 | } |
3948 | |
3949 | assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 3949, __PRETTY_FUNCTION__)); |
3950 | SDValue Arg = OutVals[OutsIndex]; |
3951 | ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; |
3952 | // Skip inalloca arguments. They don't require any work. |
3953 | if (Flags.isInAlloca()) |
3954 | continue; |
3955 | // Create frame index. |
3956 | int32_t Offset = VA.getLocMemOffset()+FPDiff; |
3957 | uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; |
3958 | FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
3959 | FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
3960 | |
3961 | if (Flags.isByVal()) { |
3962 | // Copy relative to framepointer. |
3963 | SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); |
3964 | if (!StackPtr.getNode()) |
3965 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
3966 | getPointerTy(DAG.getDataLayout())); |
3967 | Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
3968 | StackPtr, Source); |
3969 | |
3970 | MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, |
3971 | ArgChain, |
3972 | Flags, DAG, dl)); |
3973 | } else { |
3974 | // Store relative to framepointer. |
3975 | MemOpChains2.push_back(DAG.getStore( |
3976 | ArgChain, dl, Arg, FIN, |
3977 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); |
3978 | } |
3979 | } |
3980 | |
3981 | if (!MemOpChains2.empty()) |
3982 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); |
3983 | |
3984 | // Store the return address to the appropriate stack slot. |
3985 | Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, |
3986 | getPointerTy(DAG.getDataLayout()), |
3987 | RegInfo->getSlotSize(), FPDiff, dl); |
3988 | } |
3989 | |
3990 | // Build a sequence of copy-to-reg nodes chained together with token chain |
3991 | // and flag operands which copy the outgoing args into registers. |
3992 | SDValue InFlag; |
3993 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
3994 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
3995 | RegsToPass[i].second, InFlag); |
3996 | InFlag = Chain.getValue(1); |
3997 | } |
3998 | |
3999 | if (DAG.getTarget().getCodeModel() == CodeModel::Large) { |
4000 | assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode." ) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4000, __PRETTY_FUNCTION__)); |
4001 | // In the 64-bit large code model, we have to make all calls |
4002 | // through a register, since the call instruction's 32-bit |
4003 | // pc-relative offset may not be large enough to hold the whole |
4004 | // address. |
4005 | } else if (Callee->getOpcode() == ISD::GlobalAddress || |
4006 | Callee->getOpcode() == ISD::ExternalSymbol) { |
4007 | // Lower direct calls to global addresses and external symbols. Setting |
4008 | // ForCall to true here has the effect of removing WrapperRIP when possible |
4009 | // to allow direct calls to be selected without first materializing the |
4010 | // address into a register. |
4011 | Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); |
4012 | } else if (Subtarget.isTarget64BitILP32() && |
4013 | Callee->getValueType(0) == MVT::i32) { |
4014 | // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI |
4015 | Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); |
4016 | } |
4017 | |
4018 | // Returns a chain & a flag for retval copy to use. |
4019 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
4020 | SmallVector<SDValue, 8> Ops; |
4021 | |
4022 | if (!IsSibcall && isTailCall) { |
4023 | Chain = DAG.getCALLSEQ_END(Chain, |
4024 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4025 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
4026 | InFlag = Chain.getValue(1); |
4027 | } |
4028 | |
4029 | Ops.push_back(Chain); |
4030 | Ops.push_back(Callee); |
4031 | |
4032 | if (isTailCall) |
4033 | Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32)); |
4034 | |
4035 | // Add argument registers to the end of the list so that they are known live |
4036 | // into the call. |
4037 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
4038 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
4039 | RegsToPass[i].second.getValueType())); |
4040 | |
4041 | // Add a register mask operand representing the call-preserved registers. |
4042 | // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we |
4043 | // set X86_INTR calling convention because it has the same CSR mask |
4044 | // (same preserved registers). |
4045 | const uint32_t *Mask = RegInfo->getCallPreservedMask( |
4046 | MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv); |
4047 | assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention" ) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4047, __PRETTY_FUNCTION__)); |
4048 | |
4049 | // If this is an invoke in a 32-bit function using a funclet-based |
4050 | // personality, assume the function clobbers all registers. If an exception |
4051 | // is thrown, the runtime will not restore CSRs. |
4052 | // FIXME: Model this more precisely so that we can register allocate across |
4053 | // the normal edge and spill and fill across the exceptional edge. |
4054 | if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) { |
4055 | const Function &CallerFn = MF.getFunction(); |
4056 | EHPersonality Pers = |
4057 | CallerFn.hasPersonalityFn() |
4058 | ? classifyEHPersonality(CallerFn.getPersonalityFn()) |
4059 | : EHPersonality::Unknown; |
4060 | if (isFuncletEHPersonality(Pers)) |
4061 | Mask = RegInfo->getNoPreservedMask(); |
4062 | } |
4063 | |
4064 | // Define a new register mask from the existing mask. |
4065 | uint32_t *RegMask = nullptr; |
4066 | |
4067 | // In some calling conventions we need to remove the used physical registers |
4068 | // from the reg mask. |
4069 | if (CallConv == CallingConv::X86_RegCall || HasNCSR) { |
4070 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4071 | |
4072 | // Allocate a new Reg Mask and copy Mask. |
4073 | RegMask = MF.allocateRegMask(); |
4074 | unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); |
4075 | memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); |
4076 | |
4077 | // Make sure all sub registers of the argument registers are reset |
4078 | // in the RegMask. |
4079 | for (auto const &RegPair : RegsToPass) |
4080 | for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); |
4081 | SubRegs.isValid(); ++SubRegs) |
4082 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
4083 | |
4084 | // Create the RegMask Operand according to our updated mask. |
4085 | Ops.push_back(DAG.getRegisterMask(RegMask)); |
4086 | } else { |
4087 | // Create the RegMask Operand according to the static mask. |
4088 | Ops.push_back(DAG.getRegisterMask(Mask)); |
4089 | } |
4090 | |
4091 | if (InFlag.getNode()) |
4092 | Ops.push_back(InFlag); |
4093 | |
4094 | if (isTailCall) { |
4095 | // We used to do: |
4096 | //// If this is the first return lowered for this function, add the regs |
4097 | //// to the liveout set for the function. |
4098 | // This isn't right, although it's probably harmless on x86; liveouts |
4099 | // should be computed from returns not tail calls. Consider a void |
4100 | // function making a tail call to a function returning int. |
4101 | MF.getFrameInfo().setHasTailCall(); |
4102 | SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); |
4103 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
4104 | return Ret; |
4105 | } |
4106 | |
4107 | if (HasNoCfCheck && IsCFProtectionSupported) { |
4108 | Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); |
4109 | } else { |
4110 | Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); |
4111 | } |
4112 | InFlag = Chain.getValue(1); |
4113 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
4114 | |
4115 | // Save heapallocsite metadata. |
4116 | if (CLI.CS) |
4117 | if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite")) |
4118 | DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); |
4119 | |
4120 | // Create the CALLSEQ_END node. |
4121 | unsigned NumBytesForCalleeToPop; |
4122 | if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, |
4123 | DAG.getTarget().Options.GuaranteedTailCallOpt)) |
4124 | NumBytesForCalleeToPop = NumBytes; // Callee pops everything |
4125 | else if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
4126 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
4127 | SR == StackStructReturn) |
4128 | // If this is a call to a struct-return function, the callee |
4129 | // pops the hidden struct pointer, so we have to push it back. |
4130 | // This is common for Darwin/X86, Linux & Mingw32 targets. |
4131 | // For MSVC Win32 targets, the caller pops the hidden struct pointer. |
4132 | NumBytesForCalleeToPop = 4; |
4133 | else |
4134 | NumBytesForCalleeToPop = 0; // Callee pops nothing. |
4135 | |
4136 | if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) { |
4137 | // No need to reset the stack after the call if the call doesn't return. To |
4138 | // make the MI verify, we'll pretend the callee does it for us. |
4139 | NumBytesForCalleeToPop = NumBytes; |
4140 | } |
4141 | |
4142 | // Returns a flag for retval copy to use. |
4143 | if (!IsSibcall) { |
4144 | Chain = DAG.getCALLSEQ_END(Chain, |
4145 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4146 | DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, |
4147 | true), |
4148 | InFlag, dl); |
4149 | InFlag = Chain.getValue(1); |
4150 | } |
4151 | |
4152 | // Handle result values, copying them out of physregs into vregs that we |
4153 | // return. |
4154 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
4155 | InVals, RegMask); |
4156 | } |
4157 | |
4158 | //===----------------------------------------------------------------------===// |
4159 | // Fast Calling Convention (tail call) implementation |
4160 | //===----------------------------------------------------------------------===// |
4161 | |
4162 | // Like std call, callee cleans arguments, convention except that ECX is |
4163 | // reserved for storing the tail called function address. Only 2 registers are |
4164 | // free for argument passing (inreg). Tail call optimization is performed |
4165 | // provided: |
4166 | // * tailcallopt is enabled |
4167 | // * caller/callee are fastcc |
4168 | // On X86_64 architecture with GOT-style position independent code only local |
4169 | // (within module) calls are supported at the moment. |
4170 | // To keep the stack aligned according to platform abi the function |
4171 | // GetAlignedArgumentStackSize ensures that argument delta is always multiples |
4172 | // of stack alignment. (Dynamic linkers need this - darwin's dyld for example) |
4173 | // If a tail called function callee has more arguments than the caller the |
4174 | // caller needs to make sure that there is room to move the RETADDR to. This is |
4175 | // achieved by reserving an area the size of the argument delta right after the |
4176 | // original RETADDR, but before the saved framepointer or the spilled registers |
4177 | // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) |
4178 | // stack layout: |
4179 | // arg1 |
4180 | // arg2 |
4181 | // RETADDR |
4182 | // [ new RETADDR |
4183 | // move area ] |
4184 | // (possible EBP) |
4185 | // ESI |
4186 | // EDI |
4187 | // local1 .. |
4188 | |
4189 | /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align |
4190 | /// requirement. |
4191 | unsigned |
4192 | X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, |
4193 | SelectionDAG &DAG) const { |
4194 | const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment()); |
4195 | const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); |
4196 | assert(StackSize % SlotSize == 0 &&((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize" ) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4197, __PRETTY_FUNCTION__)) |
4197 | "StackSize must be a multiple of SlotSize")((StackSize % SlotSize == 0 && "StackSize must be a multiple of SlotSize" ) ? static_cast<void> (0) : __assert_fail ("StackSize % SlotSize == 0 && \"StackSize must be a multiple of SlotSize\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4197, __PRETTY_FUNCTION__)); |
4198 | return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; |
4199 | } |
4200 | |
4201 | /// Return true if the given stack call argument is already available in the |
4202 | /// same position (relatively) of the caller's incoming argument stack. |
4203 | static |
4204 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
4205 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
4206 | const X86InstrInfo *TII, const CCValAssign &VA) { |
4207 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
4208 | |
4209 | for (;;) { |
4210 | // Look through nodes that don't alter the bits of the incoming value. |
4211 | unsigned Op = Arg.getOpcode(); |
4212 | if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { |
4213 | Arg = Arg.getOperand(0); |
4214 | continue; |
4215 | } |
4216 | if (Op == ISD::TRUNCATE) { |
4217 | const SDValue &TruncInput = Arg.getOperand(0); |
4218 | if (TruncInput.getOpcode() == ISD::AssertZext && |
4219 | cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == |
4220 | Arg.getValueType()) { |
4221 | Arg = TruncInput.getOperand(0); |
4222 | continue; |
4223 | } |
4224 | } |
4225 | break; |
4226 | } |
4227 | |
4228 | int FI = INT_MAX2147483647; |
4229 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
4230 | unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
4231 | if (!Register::isVirtualRegister(VR)) |
4232 | return false; |
4233 | MachineInstr *Def = MRI->getVRegDef(VR); |
4234 | if (!Def) |
4235 | return false; |
4236 | if (!Flags.isByVal()) { |
4237 | if (!TII->isLoadFromStackSlot(*Def, FI)) |
4238 | return false; |
4239 | } else { |
4240 | unsigned Opcode = Def->getOpcode(); |
4241 | if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
4242 | Opcode == X86::LEA64_32r) && |
4243 | Def->getOperand(1).isFI()) { |
4244 | FI = Def->getOperand(1).getIndex(); |
4245 | Bytes = Flags.getByValSize(); |
4246 | } else |
4247 | return false; |
4248 | } |
4249 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
4250 | if (Flags.isByVal()) |
4251 | // ByVal argument is passed in as a pointer but it's now being |
4252 | // dereferenced. e.g. |
4253 | // define @foo(%struct.X* %A) { |
4254 | // tail call @bar(%struct.X* byval %A) |
4255 | // } |
4256 | return false; |
4257 | SDValue Ptr = Ld->getBasePtr(); |
4258 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
4259 | if (!FINode) |
4260 | return false; |
4261 | FI = FINode->getIndex(); |
4262 | } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { |
4263 | FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); |
4264 | FI = FINode->getIndex(); |
4265 | Bytes = Flags.getByValSize(); |
4266 | } else |
4267 | return false; |
4268 | |
4269 | assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail ("FI != INT_MAX", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4269, __PRETTY_FUNCTION__)); |
4270 | if (!MFI.isFixedObjectIndex(FI)) |
4271 | return false; |
4272 | |
4273 | if (Offset != MFI.getObjectOffset(FI)) |
4274 | return false; |
4275 | |
4276 | // If this is not byval, check that the argument stack object is immutable. |
4277 | // inalloca and argument copy elision can create mutable argument stack |
4278 | // objects. Byval objects can be mutated, but a byval call intends to pass the |
4279 | // mutated memory. |
4280 | if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) |
4281 | return false; |
4282 | |
4283 | if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) { |
4284 | // If the argument location is wider than the argument type, check that any |
4285 | // extension flags match. |
4286 | if (Flags.isZExt() != MFI.isObjectZExt(FI) || |
4287 | Flags.isSExt() != MFI.isObjectSExt(FI)) { |
4288 | return false; |
4289 | } |
4290 | } |
4291 | |
4292 | return Bytes == MFI.getObjectSize(FI); |
4293 | } |
4294 | |
4295 | /// Check whether the call is eligible for tail call optimization. Targets |
4296 | /// that want to do tail call optimization should implement this function. |
4297 | bool X86TargetLowering::IsEligibleForTailCallOptimization( |
4298 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
4299 | bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, |
4300 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
4301 | const SmallVectorImpl<SDValue> &OutVals, |
4302 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
4303 | if (!mayTailCallThisCC(CalleeCC)) |
4304 | return false; |
4305 | |
4306 | // If -tailcallopt is specified, make fastcc functions tail-callable. |
4307 | MachineFunction &MF = DAG.getMachineFunction(); |
4308 | const Function &CallerF = MF.getFunction(); |
4309 | |
4310 | // If the function return type is x86_fp80 and the callee return type is not, |
4311 | // then the FP_EXTEND of the call result is not a nop. It's not safe to |
4312 | // perform a tailcall optimization here. |
4313 | if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) |
4314 | return false; |
4315 | |
4316 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
4317 | bool CCMatch = CallerCC == CalleeCC; |
4318 | bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); |
4319 | bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); |
4320 | bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || |
4321 | CalleeCC == CallingConv::Tail; |
4322 | |
4323 | // Win64 functions have extra shadow space for argument homing. Don't do the |
4324 | // sibcall if the caller and callee have mismatched expectations for this |
4325 | // space. |
4326 | if (IsCalleeWin64 != IsCallerWin64) |
4327 | return false; |
4328 | |
4329 | if (IsGuaranteeTCO) { |
4330 | if (canGuaranteeTCO(CalleeCC) && CCMatch) |
4331 | return true; |
4332 | return false; |
4333 | } |
4334 | |
4335 | // Look for obvious safe cases to perform tail call optimization that do not |
4336 | // require ABI changes. This is what gcc calls sibcall. |
4337 | |
4338 | // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to |
4339 | // emit a special epilogue. |
4340 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4341 | if (RegInfo->needsStackRealignment(MF)) |
4342 | return false; |
4343 | |
4344 | // Also avoid sibcall optimization if either caller or callee uses struct |
4345 | // return semantics. |
4346 | if (isCalleeStructRet || isCallerStructRet) |
4347 | return false; |
4348 | |
4349 | // Do not sibcall optimize vararg calls unless all arguments are passed via |
4350 | // registers. |
4351 | LLVMContext &C = *DAG.getContext(); |
4352 | if (isVarArg && !Outs.empty()) { |
4353 | // Optimizing for varargs on Win64 is unlikely to be safe without |
4354 | // additional testing. |
4355 | if (IsCalleeWin64 || IsCallerWin64) |
4356 | return false; |
4357 | |
4358 | SmallVector<CCValAssign, 16> ArgLocs; |
4359 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4360 | |
4361 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4362 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) |
4363 | if (!ArgLocs[i].isRegLoc()) |
4364 | return false; |
4365 | } |
4366 | |
4367 | // If the call result is in ST0 / ST1, it needs to be popped off the x87 |
4368 | // stack. Therefore, if it's not used by the call it is not safe to optimize |
4369 | // this into a sibcall. |
4370 | bool Unused = false; |
4371 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4372 | if (!Ins[i].Used) { |
4373 | Unused = true; |
4374 | break; |
4375 | } |
4376 | } |
4377 | if (Unused) { |
4378 | SmallVector<CCValAssign, 16> RVLocs; |
4379 | CCState CCInfo(CalleeCC, false, MF, RVLocs, C); |
4380 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
4381 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
4382 | CCValAssign &VA = RVLocs[i]; |
4383 | if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) |
4384 | return false; |
4385 | } |
4386 | } |
4387 | |
4388 | // Check that the call results are passed in the same way. |
4389 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
4390 | RetCC_X86, RetCC_X86)) |
4391 | return false; |
4392 | // The callee has to preserve all registers the caller needs to preserve. |
4393 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4394 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
4395 | if (!CCMatch) { |
4396 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
4397 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
4398 | return false; |
4399 | } |
4400 | |
4401 | unsigned StackArgsSize = 0; |
4402 | |
4403 | // If the callee takes no arguments then go on to check the results of the |
4404 | // call. |
4405 | if (!Outs.empty()) { |
4406 | // Check if stack adjustment is needed. For now, do not do this if any |
4407 | // argument is passed on the stack. |
4408 | SmallVector<CCValAssign, 16> ArgLocs; |
4409 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4410 | |
4411 | // Allocate shadow area for Win64 |
4412 | if (IsCalleeWin64) |
4413 | CCInfo.AllocateStack(32, 8); |
4414 | |
4415 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4416 | StackArgsSize = CCInfo.getNextStackOffset(); |
4417 | |
4418 | if (CCInfo.getNextStackOffset()) { |
4419 | // Check if the arguments are already laid out in the right way as |
4420 | // the caller's fixed stack objects. |
4421 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4422 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
4423 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
4424 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4425 | CCValAssign &VA = ArgLocs[i]; |
4426 | SDValue Arg = OutVals[i]; |
4427 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
4428 | if (VA.getLocInfo() == CCValAssign::Indirect) |
4429 | return false; |
4430 | if (!VA.isRegLoc()) { |
4431 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, |
4432 | MFI, MRI, TII, VA)) |
4433 | return false; |
4434 | } |
4435 | } |
4436 | } |
4437 | |
4438 | bool PositionIndependent = isPositionIndependent(); |
4439 | // If the tailcall address may be in a register, then make sure it's |
4440 | // possible to register allocate for it. In 32-bit, the call address can |
4441 | // only target EAX, EDX, or ECX since the tail call must be scheduled after |
4442 | // callee-saved registers are restored. These happen to be the same |
4443 | // registers used to pass 'inreg' arguments so watch out for those. |
4444 | if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && |
4445 | !isa<ExternalSymbolSDNode>(Callee)) || |
4446 | PositionIndependent)) { |
4447 | unsigned NumInRegs = 0; |
4448 | // In PIC we need an extra register to formulate the address computation |
4449 | // for the callee. |
4450 | unsigned MaxInRegs = PositionIndependent ? 2 : 3; |
4451 | |
4452 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4453 | CCValAssign &VA = ArgLocs[i]; |
4454 | if (!VA.isRegLoc()) |
4455 | continue; |
4456 | Register Reg = VA.getLocReg(); |
4457 | switch (Reg) { |
4458 | default: break; |
4459 | case X86::EAX: case X86::EDX: case X86::ECX: |
4460 | if (++NumInRegs == MaxInRegs) |
4461 | return false; |
4462 | break; |
4463 | } |
4464 | } |
4465 | } |
4466 | |
4467 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
4468 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
4469 | return false; |
4470 | } |
4471 | |
4472 | bool CalleeWillPop = |
4473 | X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, |
4474 | MF.getTarget().Options.GuaranteedTailCallOpt); |
4475 | |
4476 | if (unsigned BytesToPop = |
4477 | MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { |
4478 | // If we have bytes to pop, the callee must pop them. |
4479 | bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; |
4480 | if (!CalleePopMatches) |
4481 | return false; |
4482 | } else if (CalleeWillPop && StackArgsSize > 0) { |
4483 | // If we don't have bytes to pop, make sure the callee doesn't pop any. |
4484 | return false; |
4485 | } |
4486 | |
4487 | return true; |
4488 | } |
4489 | |
4490 | FastISel * |
4491 | X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
4492 | const TargetLibraryInfo *libInfo) const { |
4493 | return X86::createFastISel(funcInfo, libInfo); |
4494 | } |
4495 | |
4496 | //===----------------------------------------------------------------------===// |
4497 | // Other Lowering Hooks |
4498 | //===----------------------------------------------------------------------===// |
4499 | |
4500 | static bool MayFoldLoad(SDValue Op) { |
4501 | return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); |
4502 | } |
4503 | |
4504 | static bool MayFoldIntoStore(SDValue Op) { |
4505 | return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); |
4506 | } |
4507 | |
4508 | static bool MayFoldIntoZeroExtend(SDValue Op) { |
4509 | if (Op.hasOneUse()) { |
4510 | unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); |
4511 | return (ISD::ZERO_EXTEND == Opcode); |
4512 | } |
4513 | return false; |
4514 | } |
4515 | |
4516 | static bool isTargetShuffle(unsigned Opcode) { |
4517 | switch(Opcode) { |
4518 | default: return false; |
4519 | case X86ISD::BLENDI: |
4520 | case X86ISD::PSHUFB: |
4521 | case X86ISD::PSHUFD: |
4522 | case X86ISD::PSHUFHW: |
4523 | case X86ISD::PSHUFLW: |
4524 | case X86ISD::SHUFP: |
4525 | case X86ISD::INSERTPS: |
4526 | case X86ISD::EXTRQI: |
4527 | case X86ISD::INSERTQI: |
4528 | case X86ISD::PALIGNR: |
4529 | case X86ISD::VSHLDQ: |
4530 | case X86ISD::VSRLDQ: |
4531 | case X86ISD::MOVLHPS: |
4532 | case X86ISD::MOVHLPS: |
4533 | case X86ISD::MOVSHDUP: |
4534 | case X86ISD::MOVSLDUP: |
4535 | case X86ISD::MOVDDUP: |
4536 | case X86ISD::MOVSS: |
4537 | case X86ISD::MOVSD: |
4538 | case X86ISD::UNPCKL: |
4539 | case X86ISD::UNPCKH: |
4540 | case X86ISD::VBROADCAST: |
4541 | case X86ISD::VPERMILPI: |
4542 | case X86ISD::VPERMILPV: |
4543 | case X86ISD::VPERM2X128: |
4544 | case X86ISD::SHUF128: |
4545 | case X86ISD::VPERMIL2: |
4546 | case X86ISD::VPERMI: |
4547 | case X86ISD::VPPERM: |
4548 | case X86ISD::VPERMV: |
4549 | case X86ISD::VPERMV3: |
4550 | case X86ISD::VZEXT_MOVL: |
4551 | return true; |
4552 | } |
4553 | } |
4554 | |
4555 | static bool isTargetShuffleVariableMask(unsigned Opcode) { |
4556 | switch (Opcode) { |
4557 | default: return false; |
4558 | // Target Shuffles. |
4559 | case X86ISD::PSHUFB: |
4560 | case X86ISD::VPERMILPV: |
4561 | case X86ISD::VPERMIL2: |
4562 | case X86ISD::VPPERM: |
4563 | case X86ISD::VPERMV: |
4564 | case X86ISD::VPERMV3: |
4565 | return true; |
4566 | // 'Faux' Target Shuffles. |
4567 | case ISD::OR: |
4568 | case ISD::AND: |
4569 | case X86ISD::ANDNP: |
4570 | return true; |
4571 | } |
4572 | } |
4573 | |
4574 | SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { |
4575 | MachineFunction &MF = DAG.getMachineFunction(); |
4576 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4577 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
4578 | int ReturnAddrIndex = FuncInfo->getRAIndex(); |
4579 | |
4580 | if (ReturnAddrIndex == 0) { |
4581 | // Set up a frame object for the return address. |
4582 | unsigned SlotSize = RegInfo->getSlotSize(); |
4583 | ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, |
4584 | -(int64_t)SlotSize, |
4585 | false); |
4586 | FuncInfo->setRAIndex(ReturnAddrIndex); |
4587 | } |
4588 | |
4589 | return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); |
4590 | } |
4591 | |
4592 | bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
4593 | bool hasSymbolicDisplacement) { |
4594 | // Offset should fit into 32 bit immediate field. |
4595 | if (!isInt<32>(Offset)) |
4596 | return false; |
4597 | |
4598 | // If we don't have a symbolic displacement - we don't have any extra |
4599 | // restrictions. |
4600 | if (!hasSymbolicDisplacement) |
4601 | return true; |
4602 | |
4603 | // FIXME: Some tweaks might be needed for medium code model. |
4604 | if (M != CodeModel::Small && M != CodeModel::Kernel) |
4605 | return false; |
4606 | |
4607 | // For small code model we assume that latest object is 16MB before end of 31 |
4608 | // bits boundary. We may also accept pretty large negative constants knowing |
4609 | // that all objects are in the positive half of address space. |
4610 | if (M == CodeModel::Small && Offset < 16*1024*1024) |
4611 | return true; |
4612 | |
4613 | // For kernel code model we know that all object resist in the negative half |
4614 | // of 32bits address space. We may not accept negative offsets, since they may |
4615 | // be just off and we may accept pretty large positive ones. |
4616 | if (M == CodeModel::Kernel && Offset >= 0) |
4617 | return true; |
4618 | |
4619 | return false; |
4620 | } |
4621 | |
4622 | /// Determines whether the callee is required to pop its own arguments. |
4623 | /// Callee pop is necessary to support tail calls. |
4624 | bool X86::isCalleePop(CallingConv::ID CallingConv, |
4625 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { |
4626 | // If GuaranteeTCO is true, we force some calls to be callee pop so that we |
4627 | // can guarantee TCO. |
4628 | if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) |
4629 | return true; |
4630 | |
4631 | switch (CallingConv) { |
4632 | default: |
4633 | return false; |
4634 | case CallingConv::X86_StdCall: |
4635 | case CallingConv::X86_FastCall: |
4636 | case CallingConv::X86_ThisCall: |
4637 | case CallingConv::X86_VectorCall: |
4638 | return !is64Bit; |
4639 | } |
4640 | } |
4641 | |
4642 | /// Return true if the condition is an signed comparison operation. |
4643 | static bool isX86CCSigned(unsigned X86CC) { |
4644 | switch (X86CC) { |
4645 | default: |
4646 | llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4646); |
4647 | case X86::COND_E: |
4648 | case X86::COND_NE: |
4649 | case X86::COND_B: |
4650 | case X86::COND_A: |
4651 | case X86::COND_BE: |
4652 | case X86::COND_AE: |
4653 | return false; |
4654 | case X86::COND_G: |
4655 | case X86::COND_GE: |
4656 | case X86::COND_L: |
4657 | case X86::COND_LE: |
4658 | return true; |
4659 | } |
4660 | } |
4661 | |
4662 | static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { |
4663 | switch (SetCCOpcode) { |
4664 | default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4664); |
4665 | case ISD::SETEQ: return X86::COND_E; |
4666 | case ISD::SETGT: return X86::COND_G; |
4667 | case ISD::SETGE: return X86::COND_GE; |
4668 | case ISD::SETLT: return X86::COND_L; |
4669 | case ISD::SETLE: return X86::COND_LE; |
4670 | case ISD::SETNE: return X86::COND_NE; |
4671 | case ISD::SETULT: return X86::COND_B; |
4672 | case ISD::SETUGT: return X86::COND_A; |
4673 | case ISD::SETULE: return X86::COND_BE; |
4674 | case ISD::SETUGE: return X86::COND_AE; |
4675 | } |
4676 | } |
4677 | |
4678 | /// Do a one-to-one translation of a ISD::CondCode to the X86-specific |
4679 | /// condition code, returning the condition code and the LHS/RHS of the |
4680 | /// comparison to make. |
4681 | static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, |
4682 | bool isFP, SDValue &LHS, SDValue &RHS, |
4683 | SelectionDAG &DAG) { |
4684 | if (!isFP) { |
4685 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
4686 | if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { |
4687 | // X > -1 -> X == 0, jump !sign. |
4688 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
4689 | return X86::COND_NS; |
4690 | } |
4691 | if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { |
4692 | // X < 0 -> X == 0, jump on sign. |
4693 | return X86::COND_S; |
4694 | } |
4695 | if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { |
4696 | // X >= 0 -> X == 0, jump on !sign. |
4697 | return X86::COND_NS; |
4698 | } |
4699 | if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) { |
4700 | // X < 1 -> X <= 0 |
4701 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
4702 | return X86::COND_LE; |
4703 | } |
4704 | } |
4705 | |
4706 | return TranslateIntegerX86CC(SetCCOpcode); |
4707 | } |
4708 | |
4709 | // First determine if it is required or is profitable to flip the operands. |
4710 | |
4711 | // If LHS is a foldable load, but RHS is not, flip the condition. |
4712 | if (ISD::isNON_EXTLoad(LHS.getNode()) && |
4713 | !ISD::isNON_EXTLoad(RHS.getNode())) { |
4714 | SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); |
4715 | std::swap(LHS, RHS); |
4716 | } |
4717 | |
4718 | switch (SetCCOpcode) { |
4719 | default: break; |
4720 | case ISD::SETOLT: |
4721 | case ISD::SETOLE: |
4722 | case ISD::SETUGT: |
4723 | case ISD::SETUGE: |
4724 | std::swap(LHS, RHS); |
4725 | break; |
4726 | } |
4727 | |
4728 | // On a floating point condition, the flags are set as follows: |
4729 | // ZF PF CF op |
4730 | // 0 | 0 | 0 | X > Y |
4731 | // 0 | 0 | 1 | X < Y |
4732 | // 1 | 0 | 0 | X == Y |
4733 | // 1 | 1 | 1 | unordered |
4734 | switch (SetCCOpcode) { |
4735 | default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4735); |
4736 | case ISD::SETUEQ: |
4737 | case ISD::SETEQ: return X86::COND_E; |
4738 | case ISD::SETOLT: // flipped |
4739 | case ISD::SETOGT: |
4740 | case ISD::SETGT: return X86::COND_A; |
4741 | case ISD::SETOLE: // flipped |
4742 | case ISD::SETOGE: |
4743 | case ISD::SETGE: return X86::COND_AE; |
4744 | case ISD::SETUGT: // flipped |
4745 | case ISD::SETULT: |
4746 | case ISD::SETLT: return X86::COND_B; |
4747 | case ISD::SETUGE: // flipped |
4748 | case ISD::SETULE: |
4749 | case ISD::SETLE: return X86::COND_BE; |
4750 | case ISD::SETONE: |
4751 | case ISD::SETNE: return X86::COND_NE; |
4752 | case ISD::SETUO: return X86::COND_P; |
4753 | case ISD::SETO: return X86::COND_NP; |
4754 | case ISD::SETOEQ: |
4755 | case ISD::SETUNE: return X86::COND_INVALID; |
4756 | } |
4757 | } |
4758 | |
4759 | /// Is there a floating point cmov for the specific X86 condition code? |
4760 | /// Current x86 isa includes the following FP cmov instructions: |
4761 | /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. |
4762 | static bool hasFPCMov(unsigned X86CC) { |
4763 | switch (X86CC) { |
4764 | default: |
4765 | return false; |
4766 | case X86::COND_B: |
4767 | case X86::COND_BE: |
4768 | case X86::COND_E: |
4769 | case X86::COND_P: |
4770 | case X86::COND_A: |
4771 | case X86::COND_AE: |
4772 | case X86::COND_NE: |
4773 | case X86::COND_NP: |
4774 | return true; |
4775 | } |
4776 | } |
4777 | |
4778 | |
4779 | bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
4780 | const CallInst &I, |
4781 | MachineFunction &MF, |
4782 | unsigned Intrinsic) const { |
4783 | |
4784 | const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); |
4785 | if (!IntrData) |
4786 | return false; |
4787 | |
4788 | Info.flags = MachineMemOperand::MONone; |
4789 | Info.offset = 0; |
4790 | |
4791 | switch (IntrData->Type) { |
4792 | case TRUNCATE_TO_MEM_VI8: |
4793 | case TRUNCATE_TO_MEM_VI16: |
4794 | case TRUNCATE_TO_MEM_VI32: { |
4795 | Info.opc = ISD::INTRINSIC_VOID; |
4796 | Info.ptrVal = I.getArgOperand(0); |
4797 | MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); |
4798 | MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
4799 | if (IntrData->Type == TRUNCATE_TO_MEM_VI8) |
4800 | ScalarVT = MVT::i8; |
4801 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) |
4802 | ScalarVT = MVT::i16; |
4803 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) |
4804 | ScalarVT = MVT::i32; |
4805 | |
4806 | Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); |
4807 | Info.align = Align::None(); |
4808 | Info.flags |= MachineMemOperand::MOStore; |
4809 | break; |
4810 | } |
4811 | case GATHER: |
4812 | case GATHER_AVX2: { |
4813 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
4814 | Info.ptrVal = nullptr; |
4815 | MVT DataVT = MVT::getVT(I.getType()); |
4816 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
4817 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
4818 | IndexVT.getVectorNumElements()); |
4819 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
4820 | Info.align = Align::None(); |
4821 | Info.flags |= MachineMemOperand::MOLoad; |
4822 | break; |
4823 | } |
4824 | case SCATTER: { |
4825 | Info.opc = ISD::INTRINSIC_VOID; |
4826 | Info.ptrVal = nullptr; |
4827 | MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); |
4828 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
4829 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
4830 | IndexVT.getVectorNumElements()); |
4831 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
4832 | Info.align = Align::None(); |
4833 | Info.flags |= MachineMemOperand::MOStore; |
4834 | break; |
4835 | } |
4836 | default: |
4837 | return false; |
4838 | } |
4839 | |
4840 | return true; |
4841 | } |
4842 | |
4843 | /// Returns true if the target can instruction select the |
4844 | /// specified FP immediate natively. If false, the legalizer will |
4845 | /// materialize the FP immediate as a load from a constant pool. |
4846 | bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
4847 | bool ForCodeSize) const { |
4848 | for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { |
4849 | if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) |
4850 | return true; |
4851 | } |
4852 | return false; |
4853 | } |
4854 | |
4855 | bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, |
4856 | ISD::LoadExtType ExtTy, |
4857 | EVT NewVT) const { |
4858 | assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow" ) ? static_cast<void> (0) : __assert_fail ("cast<LoadSDNode>(Load)->isSimple() && \"illegal to narrow\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4858, __PRETTY_FUNCTION__)); |
4859 | |
4860 | // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF |
4861 | // relocation target a movq or addq instruction: don't let the load shrink. |
4862 | SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr(); |
4863 | if (BasePtr.getOpcode() == X86ISD::WrapperRIP) |
4864 | if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) |
4865 | return GA->getTargetFlags() != X86II::MO_GOTTPOFF; |
4866 | |
4867 | // If this is an (1) AVX vector load with (2) multiple uses and (3) all of |
4868 | // those uses are extracted directly into a store, then the extract + store |
4869 | // can be store-folded. Therefore, it's probably not worth splitting the load. |
4870 | EVT VT = Load->getValueType(0); |
4871 | if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) { |
4872 | for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) { |
4873 | // Skip uses of the chain value. Result 0 of the node is the load value. |
4874 | if (UI.getUse().getResNo() != 0) |
4875 | continue; |
4876 | |
4877 | // If this use is not an extract + store, it's probably worth splitting. |
4878 | if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() || |
4879 | UI->use_begin()->getOpcode() != ISD::STORE) |
4880 | return true; |
4881 | } |
4882 | // All non-chain uses are extract + store. |
4883 | return false; |
4884 | } |
4885 | |
4886 | return true; |
4887 | } |
4888 | |
4889 | /// Returns true if it is beneficial to convert a load of a constant |
4890 | /// to just the constant itself. |
4891 | bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
4892 | Type *Ty) const { |
4893 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 4893, __PRETTY_FUNCTION__)); |
4894 | |
4895 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4896 | if (BitSize == 0 || BitSize > 64) |
4897 | return false; |
4898 | return true; |
4899 | } |
4900 | |
4901 | bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
4902 | // If we are using XMM registers in the ABI and the condition of the select is |
4903 | // a floating-point compare and we have blendv or conditional move, then it is |
4904 | // cheaper to select instead of doing a cross-register move and creating a |
4905 | // load that depends on the compare result. |
4906 | bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; |
4907 | return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); |
4908 | } |
4909 | |
4910 | bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { |
4911 | // TODO: It might be a win to ease or lift this restriction, but the generic |
4912 | // folds in DAGCombiner conflict with vector folds for an AVX512 target. |
4913 | if (VT.isVector() && Subtarget.hasAVX512()) |
4914 | return false; |
4915 | |
4916 | return true; |
4917 | } |
4918 | |
4919 | bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
4920 | SDValue C) const { |
4921 | // TODO: We handle scalars using custom code, but generic combining could make |
4922 | // that unnecessary. |
4923 | APInt MulC; |
4924 | if (!ISD::isConstantSplatVector(C.getNode(), MulC)) |
4925 | return false; |
4926 | |
4927 | // Find the type this will be legalized too. Otherwise we might prematurely |
4928 | // convert this to shl+add/sub and then still have to type legalize those ops. |
4929 | // Another choice would be to defer the decision for illegal types until |
4930 | // after type legalization. But constant splat vectors of i64 can't make it |
4931 | // through type legalization on 32-bit targets so we would need to special |
4932 | // case vXi64. |
4933 | while (getTypeAction(Context, VT) != TypeLegal) |
4934 | VT = getTypeToTransformTo(Context, VT); |
4935 | |
4936 | // If vector multiply is legal, assume that's faster than shl + add/sub. |
4937 | // TODO: Multiply is a complex op with higher latency and lower throughput in |
4938 | // most implementations, so this check could be loosened based on type |
4939 | // and/or a CPU attribute. |
4940 | if (isOperationLegal(ISD::MUL, VT)) |
4941 | return false; |
4942 | |
4943 | // shl+add, shl+sub, shl+add+neg |
4944 | return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || |
4945 | (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); |
4946 | } |
4947 | |
4948 | bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
4949 | bool IsSigned) const { |
4950 | // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available. |
4951 | return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov(); |
4952 | } |
4953 | |
4954 | bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
4955 | unsigned Index) const { |
4956 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
4957 | return false; |
4958 | |
4959 | // Mask vectors support all subregister combinations and operations that |
4960 | // extract half of vector. |
4961 | if (ResVT.getVectorElementType() == MVT::i1) |
4962 | return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && |
4963 | (Index == ResVT.getVectorNumElements())); |
4964 | |
4965 | return (Index % ResVT.getVectorNumElements()) == 0; |
4966 | } |
4967 | |
4968 | bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
4969 | unsigned Opc = VecOp.getOpcode(); |
4970 | |
4971 | // Assume target opcodes can't be scalarized. |
4972 | // TODO - do we have any exceptions? |
4973 | if (Opc >= ISD::BUILTIN_OP_END) |
4974 | return false; |
4975 | |
4976 | // If the vector op is not supported, try to convert to scalar. |
4977 | EVT VecVT = VecOp.getValueType(); |
4978 | if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) |
4979 | return true; |
4980 | |
4981 | // If the vector op is supported, but the scalar op is not, the transform may |
4982 | // not be worthwhile. |
4983 | EVT ScalarVT = VecVT.getScalarType(); |
4984 | return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); |
4985 | } |
4986 | |
4987 | bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const { |
4988 | // TODO: Allow vectors? |
4989 | if (VT.isVector()) |
4990 | return false; |
4991 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
4992 | } |
4993 | |
4994 | bool X86TargetLowering::isCheapToSpeculateCttz() const { |
4995 | // Speculate cttz only if we can directly use TZCNT. |
4996 | return Subtarget.hasBMI(); |
4997 | } |
4998 | |
4999 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { |
5000 | // Speculate ctlz only if we can directly use LZCNT. |
5001 | return Subtarget.hasLZCNT(); |
5002 | } |
5003 | |
5004 | bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
5005 | const SelectionDAG &DAG, |
5006 | const MachineMemOperand &MMO) const { |
5007 | if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && |
5008 | BitcastVT.getVectorElementType() == MVT::i1) |
5009 | return false; |
5010 | |
5011 | if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) |
5012 | return false; |
5013 | |
5014 | // If both types are legal vectors, it's always ok to convert them. |
5015 | if (LoadVT.isVector() && BitcastVT.isVector() && |
5016 | isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) |
5017 | return true; |
5018 | |
5019 | return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); |
5020 | } |
5021 | |
5022 | bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
5023 | const SelectionDAG &DAG) const { |
5024 | // Do not merge to float value size (128 bytes) if no implicit |
5025 | // float attribute is set. |
5026 | bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( |
5027 | Attribute::NoImplicitFloat); |
5028 | |
5029 | if (NoFloat) { |
5030 | unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; |
5031 | return (MemVT.getSizeInBits() <= MaxIntSize); |
5032 | } |
5033 | // Make sure we don't merge greater than our preferred vector |
5034 | // width. |
5035 | if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth()) |
5036 | return false; |
5037 | return true; |
5038 | } |
5039 | |
5040 | bool X86TargetLowering::isCtlzFast() const { |
5041 | return Subtarget.hasFastLZCNT(); |
5042 | } |
5043 | |
5044 | bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( |
5045 | const Instruction &AndI) const { |
5046 | return true; |
5047 | } |
5048 | |
5049 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { |
5050 | EVT VT = Y.getValueType(); |
5051 | |
5052 | if (VT.isVector()) |
5053 | return false; |
5054 | |
5055 | if (!Subtarget.hasBMI()) |
5056 | return false; |
5057 | |
5058 | // There are only 32-bit and 64-bit forms for 'andn'. |
5059 | if (VT != MVT::i32 && VT != MVT::i64) |
5060 | return false; |
5061 | |
5062 | return !isa<ConstantSDNode>(Y); |
5063 | } |
5064 | |
5065 | bool X86TargetLowering::hasAndNot(SDValue Y) const { |
5066 | EVT VT = Y.getValueType(); |
5067 | |
5068 | if (!VT.isVector()) |
5069 | return hasAndNotCompare(Y); |
5070 | |
5071 | // Vector. |
5072 | |
5073 | if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) |
5074 | return false; |
5075 | |
5076 | if (VT == MVT::v4i32) |
5077 | return true; |
5078 | |
5079 | return Subtarget.hasSSE2(); |
5080 | } |
5081 | |
5082 | bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
5083 | return X.getValueType().isScalarInteger(); // 'bt' |
5084 | } |
5085 | |
5086 | bool X86TargetLowering:: |
5087 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5088 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
5089 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
5090 | SelectionDAG &DAG) const { |
5091 | // Does baseline recommend not to perform the fold by default? |
5092 | if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5093 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) |
5094 | return false; |
5095 | // For scalars this transform is always beneficial. |
5096 | if (X.getValueType().isScalarInteger()) |
5097 | return true; |
5098 | // If all the shift amounts are identical, then transform is beneficial even |
5099 | // with rudimentary SSE2 shifts. |
5100 | if (DAG.isSplatValue(Y, /*AllowUndefs=*/true)) |
5101 | return true; |
5102 | // If we have AVX2 with it's powerful shift operations, then it's also good. |
5103 | if (Subtarget.hasAVX2()) |
5104 | return true; |
5105 | // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'. |
5106 | return NewShiftOpcode == ISD::SHL; |
5107 | } |
5108 | |
5109 | bool X86TargetLowering::shouldFoldConstantShiftPairToMask( |
5110 | const SDNode *N, CombineLevel Level) const { |
5111 | assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand (0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask") ? static_cast<void> (0) : __assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5115, __PRETTY_FUNCTION__)) |
5112 | N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand (0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask") ? static_cast<void> (0) : __assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5115, __PRETTY_FUNCTION__)) |
5113 | (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand (0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask") ? static_cast<void> (0) : __assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5115, __PRETTY_FUNCTION__)) |
5114 | N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand (0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask") ? static_cast<void> (0) : __assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5115, __PRETTY_FUNCTION__)) |
5115 | "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand (0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask") ? static_cast<void> (0) : __assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5115, __PRETTY_FUNCTION__)); |
5116 | EVT VT = N->getValueType(0); |
5117 | if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || |
5118 | (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { |
5119 | // Only fold if the shift values are equal - so it folds to AND. |
5120 | // TODO - we should fold if either is a non-uniform vector but we don't do |
5121 | // the fold for non-splats yet. |
5122 | return N->getOperand(1) == N->getOperand(0).getOperand(1); |
5123 | } |
5124 | return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); |
5125 | } |
5126 | |
5127 | bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { |
5128 | EVT VT = Y.getValueType(); |
5129 | |
5130 | // For vectors, we don't have a preference, but we probably want a mask. |
5131 | if (VT.isVector()) |
5132 | return false; |
5133 | |
5134 | // 64-bit shifts on 32-bit targets produce really bad bloated code. |
5135 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
5136 | return false; |
5137 | |
5138 | return true; |
5139 | } |
5140 | |
5141 | bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, |
5142 | SDNode *N) const { |
5143 | if (DAG.getMachineFunction().getFunction().hasMinSize() && |
5144 | !Subtarget.isOSWindows()) |
5145 | return false; |
5146 | return true; |
5147 | } |
5148 | |
5149 | bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { |
5150 | // Any legal vector type can be splatted more efficiently than |
5151 | // loading/spilling from memory. |
5152 | return isTypeLegal(VT); |
5153 | } |
5154 | |
5155 | MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { |
5156 | MVT VT = MVT::getIntegerVT(NumBits); |
5157 | if (isTypeLegal(VT)) |
5158 | return VT; |
5159 | |
5160 | // PMOVMSKB can handle this. |
5161 | if (NumBits == 128 && isTypeLegal(MVT::v16i8)) |
5162 | return MVT::v16i8; |
5163 | |
5164 | // VPMOVMSKB can handle this. |
5165 | if (NumBits == 256 && isTypeLegal(MVT::v32i8)) |
5166 | return MVT::v32i8; |
5167 | |
5168 | // TODO: Allow 64-bit type for 32-bit target. |
5169 | // TODO: 512-bit types should be allowed, but make sure that those |
5170 | // cases are handled in combineVectorSizedSetCCEquality(). |
5171 | |
5172 | return MVT::INVALID_SIMPLE_VALUE_TYPE; |
5173 | } |
5174 | |
5175 | /// Val is the undef sentinel value or equal to the specified value. |
5176 | static bool isUndefOrEqual(int Val, int CmpVal) { |
5177 | return ((Val == SM_SentinelUndef) || (Val == CmpVal)); |
5178 | } |
5179 | |
5180 | /// Val is either the undef or zero sentinel value. |
5181 | static bool isUndefOrZero(int Val) { |
5182 | return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero)); |
5183 | } |
5184 | |
5185 | /// Return true if every element in Mask, beginning from position Pos and ending |
5186 | /// in Pos+Size is the undef sentinel value. |
5187 | static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) { |
5188 | return llvm::all_of(Mask.slice(Pos, Size), |
5189 | [](int M) { return M == SM_SentinelUndef; }); |
5190 | } |
5191 | |
5192 | /// Return true if the mask creates a vector whose lower half is undefined. |
5193 | static bool isUndefLowerHalf(ArrayRef<int> Mask) { |
5194 | unsigned NumElts = Mask.size(); |
5195 | return isUndefInRange(Mask, 0, NumElts / 2); |
5196 | } |
5197 | |
5198 | /// Return true if the mask creates a vector whose upper half is undefined. |
5199 | static bool isUndefUpperHalf(ArrayRef<int> Mask) { |
5200 | unsigned NumElts = Mask.size(); |
5201 | return isUndefInRange(Mask, NumElts / 2, NumElts / 2); |
5202 | } |
5203 | |
5204 | /// Return true if Val falls within the specified range (L, H]. |
5205 | static bool isInRange(int Val, int Low, int Hi) { |
5206 | return (Val >= Low && Val < Hi); |
5207 | } |
5208 | |
5209 | /// Return true if the value of any element in Mask falls within the specified |
5210 | /// range (L, H]. |
5211 | static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5212 | return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); |
5213 | } |
5214 | |
5215 | /// Return true if Val is undef or if its value falls within the |
5216 | /// specified range (L, H]. |
5217 | static bool isUndefOrInRange(int Val, int Low, int Hi) { |
5218 | return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); |
5219 | } |
5220 | |
5221 | /// Return true if every element in Mask is undef or if its value |
5222 | /// falls within the specified range (L, H]. |
5223 | static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5224 | return llvm::all_of( |
5225 | Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); |
5226 | } |
5227 | |
5228 | /// Return true if Val is undef, zero or if its value falls within the |
5229 | /// specified range (L, H]. |
5230 | static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { |
5231 | return isUndefOrZero(Val) || isInRange(Val, Low, Hi); |
5232 | } |
5233 | |
5234 | /// Return true if every element in Mask is undef, zero or if its value |
5235 | /// falls within the specified range (L, H]. |
5236 | static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5237 | return llvm::all_of( |
5238 | Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); |
5239 | } |
5240 | |
5241 | /// Return true if every element in Mask, beginning |
5242 | /// from position Pos and ending in Pos + Size, falls within the specified |
5243 | /// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef. |
5244 | static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos, |
5245 | unsigned Size, int Low, int Step = 1) { |
5246 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5247 | if (!isUndefOrEqual(Mask[i], Low)) |
5248 | return false; |
5249 | return true; |
5250 | } |
5251 | |
5252 | /// Return true if every element in Mask, beginning |
5253 | /// from position Pos and ending in Pos+Size, falls within the specified |
5254 | /// sequential range (Low, Low+Size], or is undef or is zero. |
5255 | static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5256 | unsigned Size, int Low, |
5257 | int Step = 1) { |
5258 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5259 | if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) |
5260 | return false; |
5261 | return true; |
5262 | } |
5263 | |
5264 | /// Return true if every element in Mask, beginning |
5265 | /// from position Pos and ending in Pos+Size is undef or is zero. |
5266 | static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5267 | unsigned Size) { |
5268 | return llvm::all_of(Mask.slice(Pos, Size), |
5269 | [](int M) { return isUndefOrZero(M); }); |
5270 | } |
5271 | |
5272 | /// Helper function to test whether a shuffle mask could be |
5273 | /// simplified by widening the elements being shuffled. |
5274 | /// |
5275 | /// Appends the mask for wider elements in WidenedMask if valid. Otherwise |
5276 | /// leaves it in an unspecified state. |
5277 | /// |
5278 | /// NOTE: This must handle normal vector shuffle masks and *target* vector |
5279 | /// shuffle masks. The latter have the special property of a '-2' representing |
5280 | /// a zero-ed lane of a vector. |
5281 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5282 | SmallVectorImpl<int> &WidenedMask) { |
5283 | WidenedMask.assign(Mask.size() / 2, 0); |
5284 | for (int i = 0, Size = Mask.size(); i < Size; i += 2) { |
5285 | int M0 = Mask[i]; |
5286 | int M1 = Mask[i + 1]; |
5287 | |
5288 | // If both elements are undef, its trivial. |
5289 | if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) { |
5290 | WidenedMask[i / 2] = SM_SentinelUndef; |
5291 | continue; |
5292 | } |
5293 | |
5294 | // Check for an undef mask and a mask value properly aligned to fit with |
5295 | // a pair of values. If we find such a case, use the non-undef mask's value. |
5296 | if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) { |
5297 | WidenedMask[i / 2] = M1 / 2; |
5298 | continue; |
5299 | } |
5300 | if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) { |
5301 | WidenedMask[i / 2] = M0 / 2; |
5302 | continue; |
5303 | } |
5304 | |
5305 | // When zeroing, we need to spread the zeroing across both lanes to widen. |
5306 | if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) { |
5307 | if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) && |
5308 | (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) { |
5309 | WidenedMask[i / 2] = SM_SentinelZero; |
5310 | continue; |
5311 | } |
5312 | return false; |
5313 | } |
5314 | |
5315 | // Finally check if the two mask values are adjacent and aligned with |
5316 | // a pair. |
5317 | if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) { |
5318 | WidenedMask[i / 2] = M0 / 2; |
5319 | continue; |
5320 | } |
5321 | |
5322 | // Otherwise we can't safely widen the elements used in this shuffle. |
5323 | return false; |
5324 | } |
5325 | assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!" ) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5326, __PRETTY_FUNCTION__)) |
5326 | "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!" ) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5326, __PRETTY_FUNCTION__)); |
5327 | |
5328 | return true; |
5329 | } |
5330 | |
5331 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5332 | const APInt &Zeroable, |
5333 | bool V2IsZero, |
5334 | SmallVectorImpl<int> &WidenedMask) { |
5335 | // Create an alternative mask with info about zeroable elements. |
5336 | // Here we do not set undef elements as zeroable. |
5337 | SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end()); |
5338 | if (V2IsZero) { |
5339 | assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((!Zeroable.isNullValue() && "V2's non-undef elements are used?!" ) ? static_cast<void> (0) : __assert_fail ("!Zeroable.isNullValue() && \"V2's non-undef elements are used?!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5339, __PRETTY_FUNCTION__)); |
5340 | for (int i = 0, Size = Mask.size(); i != Size; ++i) |
5341 | if (Mask[i] != SM_SentinelUndef && Zeroable[i]) |
5342 | ZeroableMask[i] = SM_SentinelZero; |
5343 | } |
5344 | return canWidenShuffleElements(ZeroableMask, WidenedMask); |
5345 | } |
5346 | |
5347 | static bool canWidenShuffleElements(ArrayRef<int> Mask) { |
5348 | SmallVector<int, 32> WidenedMask; |
5349 | return canWidenShuffleElements(Mask, WidenedMask); |
5350 | } |
5351 | |
5352 | /// Returns true if Elt is a constant zero or a floating point constant +0.0. |
5353 | bool X86::isZeroNode(SDValue Elt) { |
5354 | return isNullConstant(Elt) || isNullFPConstant(Elt); |
5355 | } |
5356 | |
5357 | // Build a vector of constants. |
5358 | // Use an UNDEF node if MaskElt == -1. |
5359 | // Split 64-bit constants in the 32-bit mode. |
5360 | static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, |
5361 | const SDLoc &dl, bool IsMask = false) { |
5362 | |
5363 | SmallVector<SDValue, 32> Ops; |
5364 | bool Split = false; |
5365 | |
5366 | MVT ConstVecVT = VT; |
5367 | unsigned NumElts = VT.getVectorNumElements(); |
5368 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
5369 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
5370 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
5371 | Split = true; |
5372 | } |
5373 | |
5374 | MVT EltVT = ConstVecVT.getVectorElementType(); |
5375 | for (unsigned i = 0; i < NumElts; ++i) { |
5376 | bool IsUndef = Values[i] < 0 && IsMask; |
5377 | SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : |
5378 | DAG.getConstant(Values[i], dl, EltVT); |
5379 | Ops.push_back(OpNode); |
5380 | if (Split) |
5381 | Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : |
5382 | DAG.getConstant(0, dl, EltVT)); |
5383 | } |
5384 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
5385 | if (Split) |
5386 | ConstsNode = DAG.getBitcast(VT, ConstsNode); |
5387 | return ConstsNode; |
5388 | } |
5389 | |
5390 | static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs, |
5391 | MVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
5392 | assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays" ) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5393, __PRETTY_FUNCTION__)) |
5393 | "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays" ) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5393, __PRETTY_FUNCTION__)); |
5394 | SmallVector<SDValue, 32> Ops; |
5395 | bool Split = false; |
5396 | |
5397 | MVT ConstVecVT = VT; |
5398 | unsigned NumElts = VT.getVectorNumElements(); |
5399 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
5400 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
5401 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
5402 | Split = true; |
5403 | } |
5404 | |
5405 | MVT EltVT = ConstVecVT.getVectorElementType(); |
5406 | for (unsigned i = 0, e = Bits.size(); i != e; ++i) { |
5407 | if (Undefs[i]) { |
5408 | Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); |
5409 | continue; |
5410 | } |
5411 | const APInt &V = Bits[i]; |
5412 | assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes" ) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5412, __PRETTY_FUNCTION__)); |
5413 | if (Split) { |
5414 | Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); |
5415 | Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); |
5416 | } else if (EltVT == MVT::f32) { |
5417 | APFloat FV(APFloat::IEEEsingle(), V); |
5418 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
5419 | } else if (EltVT == MVT::f64) { |
5420 | APFloat FV(APFloat::IEEEdouble(), V); |
5421 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
5422 | } else { |
5423 | Ops.push_back(DAG.getConstant(V, dl, EltVT)); |
5424 | } |
5425 | } |
5426 | |
5427 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
5428 | return DAG.getBitcast(VT, ConstsNode); |
5429 | } |
5430 | |
5431 | /// Returns a vector of specified type with all zero elements. |
5432 | static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, |
5433 | SelectionDAG &DAG, const SDLoc &dl) { |
5434 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector () || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type" ) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5436, __PRETTY_FUNCTION__)) |
5435 | VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector () || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type" ) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5436, __PRETTY_FUNCTION__)) |
5436 | "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector () || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type" ) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5436, __PRETTY_FUNCTION__)); |
5437 | |
5438 | // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest |
5439 | // type. This ensures they get CSE'd. But if the integer type is not |
5440 | // available, use a floating-point +0.0 instead. |
5441 | SDValue Vec; |
5442 | if (!Subtarget.hasSSE2() && VT.is128BitVector()) { |
5443 | Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); |
5444 | } else if (VT.isFloatingPoint()) { |
5445 | Vec = DAG.getConstantFP(+0.0, dl, VT); |
5446 | } else if (VT.getVectorElementType() == MVT::i1) { |
5447 | assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type") ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5448, __PRETTY_FUNCTION__)) |
5448 | "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type") ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5448, __PRETTY_FUNCTION__)); |
5449 | Vec = DAG.getConstant(0, dl, VT); |
5450 | } else { |
5451 | unsigned Num32BitElts = VT.getSizeInBits() / 32; |
5452 | Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts)); |
5453 | } |
5454 | return DAG.getBitcast(VT, Vec); |
5455 | } |
5456 | |
5457 | static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, |
5458 | const SDLoc &dl, unsigned vectorWidth) { |
5459 | EVT VT = Vec.getValueType(); |
5460 | EVT ElVT = VT.getVectorElementType(); |
5461 | unsigned Factor = VT.getSizeInBits()/vectorWidth; |
5462 | EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, |
5463 | VT.getVectorNumElements()/Factor); |
5464 | |
5465 | // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR |
5466 | unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); |
5467 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5467, __PRETTY_FUNCTION__)); |
5468 | |
5469 | // This is the index of the first element of the vectorWidth-bit chunk |
5470 | // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. |
5471 | IdxVal &= ~(ElemsPerChunk - 1); |
5472 | |
5473 | // If the input is a buildvector just emit a smaller one. |
5474 | if (Vec.getOpcode() == ISD::BUILD_VECTOR) |
5475 | return DAG.getBuildVector(ResultVT, dl, |
5476 | Vec->ops().slice(IdxVal, ElemsPerChunk)); |
5477 | |
5478 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
5479 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); |
5480 | } |
5481 | |
5482 | /// Generate a DAG to grab 128-bits from a vector > 128 bits. This |
5483 | /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 |
5484 | /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 |
5485 | /// instructions or a simple subregister reference. Idx is an index in the |
5486 | /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes |
5487 | /// lowering EXTRACT_VECTOR_ELT operations easier. |
5488 | static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, |
5489 | SelectionDAG &DAG, const SDLoc &dl) { |
5490 | assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType(). is512BitVector()) && "Unexpected vector size!") ? static_cast <void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5491, __PRETTY_FUNCTION__)) |
5491 | Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType(). is512BitVector()) && "Unexpected vector size!") ? static_cast <void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5491, __PRETTY_FUNCTION__)); |
5492 | return extractSubVector(Vec, IdxVal, DAG, dl, 128); |
5493 | } |
5494 | |
5495 | /// Generate a DAG to grab 256-bits from a 512-bit vector. |
5496 | static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, |
5497 | SelectionDAG &DAG, const SDLoc &dl) { |
5498 | assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!" ) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5498, __PRETTY_FUNCTION__)); |
5499 | return extractSubVector(Vec, IdxVal, DAG, dl, 256); |
5500 | } |
5501 | |
5502 | static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
5503 | SelectionDAG &DAG, const SDLoc &dl, |
5504 | unsigned vectorWidth) { |
5505 | assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width" ) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5506, __PRETTY_FUNCTION__)) |
5506 | "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width" ) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5506, __PRETTY_FUNCTION__)); |
5507 | // Inserting UNDEF is Result |
5508 | if (Vec.isUndef()) |
5509 | return Result; |
5510 | EVT VT = Vec.getValueType(); |
5511 | EVT ElVT = VT.getVectorElementType(); |
5512 | EVT ResultVT = Result.getValueType(); |
5513 | |
5514 | // Insert the relevant vectorWidth bits. |
5515 | unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); |
5516 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5516, __PRETTY_FUNCTION__)); |
5517 | |
5518 | // This is the index of the first element of the vectorWidth-bit chunk |
5519 | // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. |
5520 | IdxVal &= ~(ElemsPerChunk - 1); |
5521 | |
5522 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
5523 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); |
5524 | } |
5525 | |
5526 | /// Generate a DAG to put 128-bits into a vector > 128 bits. This |
5527 | /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or |
5528 | /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a |
5529 | /// simple superregister reference. Idx is an index in the 128 bits |
5530 | /// we want. It need not be aligned to a 128-bit boundary. That makes |
5531 | /// lowering INSERT_VECTOR_ELT operations easier. |
5532 | static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
5533 | SelectionDAG &DAG, const SDLoc &dl) { |
5534 | assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!" ) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5534, __PRETTY_FUNCTION__)); |
5535 | return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); |
5536 | } |
5537 | |
5538 | /// Widen a vector to a larger size with the same scalar type, with the new |
5539 | /// elements either zero or undef. |
5540 | static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, |
5541 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
5542 | const SDLoc &dl) { |
5543 | assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && "Unsupported vector widening type") ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5545, __PRETTY_FUNCTION__)) |
5544 | Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && "Unsupported vector widening type") ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5545, __PRETTY_FUNCTION__)) |
5545 | "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && "Unsupported vector widening type") ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5545, __PRETTY_FUNCTION__)); |
5546 | SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) |
5547 | : DAG.getUNDEF(VT); |
5548 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, |
5549 | DAG.getIntPtrConstant(0, dl)); |
5550 | } |
5551 | |
5552 | /// Widen a vector to a larger size with the same scalar type, with the new |
5553 | /// elements either zero or undef. |
5554 | static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements, |
5555 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
5556 | const SDLoc &dl, unsigned WideSizeInBits) { |
5557 | assert(Vec.getValueSizeInBits() < WideSizeInBits &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type" ) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5559, __PRETTY_FUNCTION__)) |
5558 | (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type" ) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5559, __PRETTY_FUNCTION__)) |
5559 | "Unsupported vector widening type")((Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && "Unsupported vector widening type" ) ? static_cast<void> (0) : __assert_fail ("Vec.getValueSizeInBits() < WideSizeInBits && (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && \"Unsupported vector widening type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5559, __PRETTY_FUNCTION__)); |
5560 | unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits(); |
5561 | MVT SVT = Vec.getSimpleValueType().getScalarType(); |
5562 | MVT VT = MVT::getVectorVT(SVT, WideNumElts); |
5563 | return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); |
5564 | } |
5565 | |
5566 | // Helper function to collect subvector ops that are concated together, |
5567 | // either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series. |
5568 | // The subvectors in Ops are guaranteed to be the same type. |
5569 | static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) { |
5570 | assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast <void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5570, __PRETTY_FUNCTION__)); |
5571 | |
5572 | if (N->getOpcode() == ISD::CONCAT_VECTORS) { |
5573 | Ops.append(N->op_begin(), N->op_end()); |
5574 | return true; |
5575 | } |
5576 | |
5577 | if (N->getOpcode() == ISD::INSERT_SUBVECTOR && |
5578 | isa<ConstantSDNode>(N->getOperand(2))) { |
5579 | SDValue Src = N->getOperand(0); |
5580 | SDValue Sub = N->getOperand(1); |
5581 | const APInt &Idx = N->getConstantOperandAPInt(2); |
5582 | EVT VT = Src.getValueType(); |
5583 | EVT SubVT = Sub.getValueType(); |
5584 | |
5585 | // TODO - Handle more general insert_subvector chains. |
5586 | if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && |
5587 | Idx == (VT.getVectorNumElements() / 2) && |
5588 | Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
5589 | Src.getOperand(1).getValueType() == SubVT && |
5590 | isNullConstant(Src.getOperand(2))) { |
5591 | Ops.push_back(Src.getOperand(1)); |
5592 | Ops.push_back(Sub); |
5593 | return true; |
5594 | } |
5595 | } |
5596 | |
5597 | return false; |
5598 | } |
5599 | |
5600 | // Helper for splitting operands of an operation to legal target size and |
5601 | // apply a function on each part. |
5602 | // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in |
5603 | // 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for |
5604 | // deciding if/how to split Ops. Ops elements do *not* have to be of type VT. |
5605 | // The argument Builder is a function that will be applied on each split part: |
5606 | // SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>) |
5607 | template <typename F> |
5608 | SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, |
5609 | const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, |
5610 | F Builder, bool CheckBWI = true) { |
5611 | assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2" ) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5611, __PRETTY_FUNCTION__)); |
5612 | unsigned NumSubs = 1; |
5613 | if ((CheckBWI && Subtarget.useBWIRegs()) || |
5614 | (!CheckBWI && Subtarget.useAVX512Regs())) { |
5615 | if (VT.getSizeInBits() > 512) { |
5616 | NumSubs = VT.getSizeInBits() / 512; |
5617 | assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size" ) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5617, __PRETTY_FUNCTION__)); |
5618 | } |
5619 | } else if (Subtarget.hasAVX2()) { |
5620 | if (VT.getSizeInBits() > 256) { |
5621 | NumSubs = VT.getSizeInBits() / 256; |
5622 | assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size" ) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5622, __PRETTY_FUNCTION__)); |
5623 | } |
5624 | } else { |
5625 | if (VT.getSizeInBits() > 128) { |
5626 | NumSubs = VT.getSizeInBits() / 128; |
5627 | assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size" ) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5627, __PRETTY_FUNCTION__)); |
5628 | } |
5629 | } |
5630 | |
5631 | if (NumSubs == 1) |
5632 | return Builder(DAG, DL, Ops); |
5633 | |
5634 | SmallVector<SDValue, 4> Subs; |
5635 | for (unsigned i = 0; i != NumSubs; ++i) { |
5636 | SmallVector<SDValue, 2> SubOps; |
5637 | for (SDValue Op : Ops) { |
5638 | EVT OpVT = Op.getValueType(); |
5639 | unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs; |
5640 | unsigned SizeSub = OpVT.getSizeInBits() / NumSubs; |
5641 | SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub)); |
5642 | } |
5643 | Subs.push_back(Builder(DAG, DL, SubOps)); |
5644 | } |
5645 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
5646 | } |
5647 | |
5648 | /// Insert i1-subvector to i1-vector. |
5649 | static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, |
5650 | const X86Subtarget &Subtarget) { |
5651 | |
5652 | SDLoc dl(Op); |
5653 | SDValue Vec = Op.getOperand(0); |
5654 | SDValue SubVec = Op.getOperand(1); |
5655 | SDValue Idx = Op.getOperand(2); |
5656 | |
5657 | if (!isa<ConstantSDNode>(Idx)) |
5658 | return SDValue(); |
5659 | |
5660 | // Inserting undef is a nop. We can just return the original vector. |
5661 | if (SubVec.isUndef()) |
5662 | return Vec; |
5663 | |
5664 | unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); |
5665 | if (IdxVal == 0 && Vec.isUndef()) // the operation is legal |
5666 | return Op; |
5667 | |
5668 | MVT OpVT = Op.getSimpleValueType(); |
5669 | unsigned NumElems = OpVT.getVectorNumElements(); |
5670 | |
5671 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
5672 | |
5673 | // Extend to natively supported kshift. |
5674 | MVT WideOpVT = OpVT; |
5675 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
5676 | WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
5677 | |
5678 | // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts |
5679 | // if necessary. |
5680 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { |
5681 | // May need to promote to a legal type. |
5682 | Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
5683 | DAG.getConstant(0, dl, WideOpVT), |
5684 | SubVec, Idx); |
5685 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
5686 | } |
5687 | |
5688 | MVT SubVecVT = SubVec.getSimpleValueType(); |
5689 | unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); |
5690 | |
5691 | assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT .getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR" ) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5693, __PRETTY_FUNCTION__)) |
5692 | IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT .getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR" ) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5693, __PRETTY_FUNCTION__)) |
5693 | "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT .getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR" ) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5693, __PRETTY_FUNCTION__)); |
5694 | |
5695 | SDValue Undef = DAG.getUNDEF(WideOpVT); |
5696 | |
5697 | if (IdxVal == 0) { |
5698 | // Zero lower bits of the Vec |
5699 | SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); |
5700 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, |
5701 | ZeroIdx); |
5702 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
5703 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
5704 | // Merge them together, SubVec should be zero extended. |
5705 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
5706 | DAG.getConstant(0, dl, WideOpVT), |
5707 | SubVec, ZeroIdx); |
5708 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
5709 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
5710 | } |
5711 | |
5712 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
5713 | Undef, SubVec, ZeroIdx); |
5714 | |
5715 | if (Vec.isUndef()) { |
5716 | assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast< void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5716, __PRETTY_FUNCTION__)); |
5717 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
5718 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
5719 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
5720 | } |
5721 | |
5722 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
5723 | assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast< void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5723, __PRETTY_FUNCTION__)); |
5724 | NumElems = WideOpVT.getVectorNumElements(); |
5725 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
5726 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
5727 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
5728 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
5729 | if (ShiftRight != 0) |
5730 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
5731 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
5732 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
5733 | } |
5734 | |
5735 | // Simple case when we put subvector in the upper part |
5736 | if (IdxVal + SubVecNumElems == NumElems) { |
5737 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
5738 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
5739 | if (SubVecNumElems * 2 == NumElems) { |
5740 | // Special case, use legal zero extending insert_subvector. This allows |
5741 | // isel to opimitize when bits are known zero. |
5742 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); |
5743 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
5744 | DAG.getConstant(0, dl, WideOpVT), |
5745 | Vec, ZeroIdx); |
5746 | } else { |
5747 | // Otherwise use explicit shifts to zero the bits. |
5748 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
5749 | Undef, Vec, ZeroIdx); |
5750 | NumElems = WideOpVT.getVectorNumElements(); |
5751 | SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); |
5752 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
5753 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
5754 | } |
5755 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
5756 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
5757 | } |
5758 | |
5759 | // Inserting into the middle is more complicated. |
5760 | |
5761 | NumElems = WideOpVT.getVectorNumElements(); |
5762 | |
5763 | // Widen the vector if needed. |
5764 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); |
5765 | |
5766 | // Clear the upper bits of the subvector and move it to its insert position. |
5767 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
5768 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
5769 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
5770 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
5771 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
5772 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
5773 | |
5774 | // Isolate the bits below the insertion point. |
5775 | unsigned LowShift = NumElems - IdxVal; |
5776 | SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, |
5777 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
5778 | Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, |
5779 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
5780 | |
5781 | // Isolate the bits after the last inserted bit. |
5782 | unsigned HighShift = IdxVal + SubVecNumElems; |
5783 | SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, |
5784 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
5785 | High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, |
5786 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
5787 | |
5788 | // Now OR all 3 pieces together. |
5789 | Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); |
5790 | SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); |
5791 | |
5792 | // Reduce to original width if needed. |
5793 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
5794 | } |
5795 | |
5796 | static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, |
5797 | const SDLoc &dl) { |
5798 | assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch")((V1.getValueType() == V2.getValueType() && "subvector type mismatch" ) ? static_cast<void> (0) : __assert_fail ("V1.getValueType() == V2.getValueType() && \"subvector type mismatch\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5798, __PRETTY_FUNCTION__)); |
5799 | EVT SubVT = V1.getValueType(); |
5800 | EVT SubSVT = SubVT.getScalarType(); |
5801 | unsigned SubNumElts = SubVT.getVectorNumElements(); |
5802 | unsigned SubVectorWidth = SubVT.getSizeInBits(); |
5803 | EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); |
5804 | SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); |
5805 | return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); |
5806 | } |
5807 | |
5808 | /// Returns a vector of specified type with all bits set. |
5809 | /// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>. |
5810 | /// Then bitcast to their original type, ensuring they get CSE'd. |
5811 | static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
5812 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector ()) && "Expected a 128/256/512-bit vector type") ? static_cast <void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5813, __PRETTY_FUNCTION__)) |
5813 | "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector ()) && "Expected a 128/256/512-bit vector type") ? static_cast <void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5813, __PRETTY_FUNCTION__)); |
5814 | |
5815 | APInt Ones = APInt::getAllOnesValue(32); |
5816 | unsigned NumElts = VT.getSizeInBits() / 32; |
5817 | SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); |
5818 | return DAG.getBitcast(VT, Vec); |
5819 | } |
5820 | |
5821 | // Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode. |
5822 | static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { |
5823 | switch (Opcode) { |
5824 | case ISD::ANY_EXTEND: |
5825 | case ISD::ANY_EXTEND_VECTOR_INREG: |
5826 | return ISD::ANY_EXTEND_VECTOR_INREG; |
5827 | case ISD::ZERO_EXTEND: |
5828 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
5829 | return ISD::ZERO_EXTEND_VECTOR_INREG; |
5830 | case ISD::SIGN_EXTEND: |
5831 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
5832 | return ISD::SIGN_EXTEND_VECTOR_INREG; |
5833 | } |
5834 | llvm_unreachable("Unknown opcode")::llvm::llvm_unreachable_internal("Unknown opcode", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5834); |
5835 | } |
5836 | |
5837 | static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT, |
5838 | SDValue In, SelectionDAG &DAG) { |
5839 | EVT InVT = In.getValueType(); |
5840 | assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs." ) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5840, __PRETTY_FUNCTION__)); |
5841 | assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode" ) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5843, __PRETTY_FUNCTION__)) |
5842 | ISD::ZERO_EXTEND == Opcode) &&(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode" ) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5843, __PRETTY_FUNCTION__)) |
5843 | "Unknown extension opcode")(((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && "Unknown extension opcode" ) ? static_cast<void> (0) : __assert_fail ("(ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || ISD::ZERO_EXTEND == Opcode) && \"Unknown extension opcode\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5843, __PRETTY_FUNCTION__)); |
5844 | |
5845 | // For 256-bit vectors, we only need the lower (128-bit) input half. |
5846 | // For 512-bit vectors, we only need the lower input half or quarter. |
5847 | if (InVT.getSizeInBits() > 128) { |
5848 | assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!" ) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5849, __PRETTY_FUNCTION__)) |
5849 | "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!" ) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\"" , "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/X86/X86ISelLowering.cpp" , 5849, __PRETTY_FUNCTION__)); |