Bug Summary

File:include/llvm/ADT/SmallBitVector.h
Warning:line 121, column 3
Potential memory leak

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86 -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn350071/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-8~svn350071=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-12-27-042839-1215-1 -x c++ /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/EHPersonalities.h"
30#include "llvm/CodeGen/IntrinsicLowering.h"
31#include "llvm/CodeGen/MachineFrameInfo.h"
32#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineJumpTableInfo.h"
35#include "llvm/CodeGen/MachineModuleInfo.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/TargetLowering.h"
38#include "llvm/CodeGen/WinEHFuncInfo.h"
39#include "llvm/IR/CallSite.h"
40#include "llvm/IR/CallingConv.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/DiagnosticInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalAlias.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/MC/MCAsmInfo.h"
50#include "llvm/MC/MCContext.h"
51#include "llvm/MC/MCExpr.h"
52#include "llvm/MC/MCSymbol.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Target/TargetOptions.h"
59#include <algorithm>
60#include <bitset>
61#include <cctype>
62#include <numeric>
63using namespace llvm;
64
65#define DEBUG_TYPE"x86-isel" "x86-isel"
66
67STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
68
69static cl::opt<bool> ExperimentalVectorWideningLegalization(
70 "x86-experimental-vector-widening-legalization", cl::init(false),
71 cl::desc("Enable an experimental vector type legalization through widening "
72 "rather than promotion."),
73 cl::Hidden);
74
75static cl::opt<int> ExperimentalPrefLoopAlignment(
76 "x86-experimental-pref-loop-alignment", cl::init(4),
77 cl::desc("Sets the preferable loop alignment for experiments "
78 "(the last x86-experimental-pref-loop-alignment bits"
79 " of the loop header PC will be 0)."),
80 cl::Hidden);
81
82static cl::opt<bool> MulConstantOptimization(
83 "mul-constant-optimization", cl::init(true),
84 cl::desc("Replace 'mul x, Const' with more effective instructions like "
85 "SHIFT, LEA, etc."),
86 cl::Hidden);
87
88/// Call this when the user attempts to do something unsupported, like
89/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
90/// report_fatal_error, so calling code should attempt to recover without
91/// crashing.
92static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
93 const char *Msg) {
94 MachineFunction &MF = DAG.getMachineFunction();
95 DAG.getContext()->diagnose(
96 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
97}
98
99X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
100 const X86Subtarget &STI)
101 : TargetLowering(TM), Subtarget(STI) {
102 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
103 X86ScalarSSEf64 = Subtarget.hasSSE2();
104 X86ScalarSSEf32 = Subtarget.hasSSE1();
105 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
106
107 // Set up the TargetLowering object.
108
109 // X86 is weird. It always uses i8 for shift amounts and setcc results.
110 setBooleanContents(ZeroOrOneBooleanContent);
111 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
112 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
113
114 // For 64-bit, since we have so many registers, use the ILP scheduler.
115 // For 32-bit, use the register pressure specific scheduling.
116 // For Atom, always use ILP scheduling.
117 if (Subtarget.isAtom())
118 setSchedulingPreference(Sched::ILP);
119 else if (Subtarget.is64Bit())
120 setSchedulingPreference(Sched::ILP);
121 else
122 setSchedulingPreference(Sched::RegPressure);
123 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
124 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
125
126 // Bypass expensive divides and use cheaper ones.
127 if (TM.getOptLevel() >= CodeGenOpt::Default) {
128 if (Subtarget.hasSlowDivide32())
129 addBypassSlowDiv(32, 8);
130 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
131 addBypassSlowDiv(64, 32);
132 }
133
134 if (Subtarget.isTargetKnownWindowsMSVC() ||
135 Subtarget.isTargetWindowsItanium()) {
136 // Setup Windows compiler runtime calls.
137 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
138 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
139 setLibcallName(RTLIB::SREM_I64, "_allrem");
140 setLibcallName(RTLIB::UREM_I64, "_aullrem");
141 setLibcallName(RTLIB::MUL_I64, "_allmul");
142 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
143 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
147 }
148
149 if (Subtarget.isTargetDarwin()) {
150 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
151 setUseUnderscoreSetJmp(false);
152 setUseUnderscoreLongJmp(false);
153 } else if (Subtarget.isTargetWindowsGNU()) {
154 // MS runtime is weird: it exports _setjmp, but longjmp!
155 setUseUnderscoreSetJmp(true);
156 setUseUnderscoreLongJmp(false);
157 } else {
158 setUseUnderscoreSetJmp(true);
159 setUseUnderscoreLongJmp(true);
160 }
161
162 // Set up the register classes.
163 addRegisterClass(MVT::i8, &X86::GR8RegClass);
164 addRegisterClass(MVT::i16, &X86::GR16RegClass);
165 addRegisterClass(MVT::i32, &X86::GR32RegClass);
166 if (Subtarget.is64Bit())
167 addRegisterClass(MVT::i64, &X86::GR64RegClass);
168
169 for (MVT VT : MVT::integer_valuetypes())
170 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171
172 // We don't accept any truncstore of integer registers.
173 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
174 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
176 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
177 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
178 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
179
180 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
181
182 // SETOEQ and SETUNE require checking two conditions.
183 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
184 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
186 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
189
190 // Integer absolute.
191 if (Subtarget.hasCMov()) {
192 setOperationAction(ISD::ABS , MVT::i16 , Custom);
193 setOperationAction(ISD::ABS , MVT::i32 , Custom);
194 if (Subtarget.is64Bit())
195 setOperationAction(ISD::ABS , MVT::i64 , Custom);
196 }
197
198 // Funnel shifts.
199 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
200 setOperationAction(ShiftOp , MVT::i16 , Custom);
201 setOperationAction(ShiftOp , MVT::i32 , Custom);
202 if (Subtarget.is64Bit())
203 setOperationAction(ShiftOp , MVT::i64 , Custom);
204 }
205
206 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
207 // operation.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
209 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
210 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
211
212 if (Subtarget.is64Bit()) {
213 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
214 // f32/f64 are legal, f80 is custom.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
216 else
217 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
218 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
219 } else if (!Subtarget.useSoftFloat()) {
220 // We have an algorithm for SSE2->double, and we turn this into a
221 // 64-bit FILD followed by conditional FADD for other targets.
222 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
223 // We have an algorithm for SSE2, and we turn this into a 64-bit
224 // FILD or VCVTUSI2SS/SD for other targets.
225 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
226 } else {
227 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
228 }
229
230 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
231 // this operation.
232 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
233 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
234
235 if (!Subtarget.useSoftFloat()) {
236 // SSE has no i16 to fp conversion, only i32.
237 if (X86ScalarSSEf32) {
238 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
239 // f32 and f64 cases are Legal, f80 case is not
240 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
241 } else {
242 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
243 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
244 }
245 } else {
246 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
247 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
248 }
249
250 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
251 // this operation.
252 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
253 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
254
255 if (!Subtarget.useSoftFloat()) {
256 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
257 // are Legal, f80 is custom lowered.
258 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
259 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
260
261 if (X86ScalarSSEf32) {
262 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
263 // f32 and f64 cases are Legal, f80 case is not
264 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
265 } else {
266 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
267 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
268 }
269 } else {
270 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
271 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
272 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
273 }
274
275 // Handle FP_TO_UINT by promoting the destination to a larger signed
276 // conversion.
277 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
279 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
280
281 if (Subtarget.is64Bit()) {
282 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
283 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
284 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
285 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
286 } else {
287 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
288 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
289 }
290 } else if (!Subtarget.useSoftFloat()) {
291 // Since AVX is a superset of SSE3, only check for SSE here.
292 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
293 // Expand FP_TO_UINT into a select.
294 // FIXME: We would like to use a Custom expander here eventually to do
295 // the optimal thing for SSE vs. the default expansion in the legalizer.
296 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
297 else
298 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
299 // With SSE3 we can use fisttpll to convert to a signed i64; without
300 // SSE, we're stuck with a fistpll.
301 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
302
303 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
304 }
305
306 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
307 if (!X86ScalarSSEf64) {
308 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
309 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
310 if (Subtarget.is64Bit()) {
311 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
312 // Without SSE, i64->f64 goes through memory.
313 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
314 }
315 } else if (!Subtarget.is64Bit())
316 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
317
318 // Scalar integer divide and remainder are lowered to use operations that
319 // produce two results, to match the available instructions. This exposes
320 // the two-result form to trivial CSE, which is able to combine x/y and x%y
321 // into a single instruction.
322 //
323 // Scalar integer multiply-high is also lowered to use two-result
324 // operations, to match the available instructions. However, plain multiply
325 // (low) operations are left as Legal, as there are single-result
326 // instructions for this in x86. Using the two-result multiply instructions
327 // when both high and low results are needed must be arranged by dagcombine.
328 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
329 setOperationAction(ISD::MULHS, VT, Expand);
330 setOperationAction(ISD::MULHU, VT, Expand);
331 setOperationAction(ISD::SDIV, VT, Expand);
332 setOperationAction(ISD::UDIV, VT, Expand);
333 setOperationAction(ISD::SREM, VT, Expand);
334 setOperationAction(ISD::UREM, VT, Expand);
335 }
336
337 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
338 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
339 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
340 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
341 setOperationAction(ISD::BR_CC, VT, Expand);
342 setOperationAction(ISD::SELECT_CC, VT, Expand);
343 }
344 if (Subtarget.is64Bit())
345 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
346 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
347 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
348 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
349 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
350
351 setOperationAction(ISD::FREM , MVT::f32 , Expand);
352 setOperationAction(ISD::FREM , MVT::f64 , Expand);
353 setOperationAction(ISD::FREM , MVT::f80 , Expand);
354 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
355
356 // Promote the i8 variants and force them on up to i32 which has a shorter
357 // encoding.
358 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
359 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
360 if (!Subtarget.hasBMI()) {
361 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
362 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
363 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
364 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
365 if (Subtarget.is64Bit()) {
366 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
367 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
368 }
369 }
370
371 if (Subtarget.hasLZCNT()) {
372 // When promoting the i8 variants, force them to i32 for a shorter
373 // encoding.
374 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
375 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
376 } else {
377 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
378 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
379 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
380 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
381 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
382 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
383 if (Subtarget.is64Bit()) {
384 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
385 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
386 }
387 }
388
389 // Special handling for half-precision floating point conversions.
390 // If we don't have F16C support, then lower half float conversions
391 // into library calls.
392 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
393 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
394 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
395 }
396
397 // There's never any support for operations beyond MVT::f32.
398 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
399 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
400 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
401 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
402
403 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
404 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
405 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
406 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
407 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
408 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
409
410 if (Subtarget.hasPOPCNT()) {
411 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
412 } else {
413 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
414 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
415 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
416 if (Subtarget.is64Bit())
417 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
418 }
419
420 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
421
422 if (!Subtarget.hasMOVBE())
423 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
424
425 // These should be promoted to a larger select which is supported.
426 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
427 // X86 wants to expand cmov itself.
428 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
429 setOperationAction(ISD::SELECT, VT, Custom);
430 setOperationAction(ISD::SETCC, VT, Custom);
431 }
432 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
433 if (VT == MVT::i64 && !Subtarget.is64Bit())
434 continue;
435 setOperationAction(ISD::SELECT, VT, Custom);
436 setOperationAction(ISD::SETCC, VT, Custom);
437 }
438
439 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
440 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
441 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
442
443 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
444 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
445 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
446 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
447 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
448 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
449 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
450 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
451
452 // Darwin ABI issue.
453 for (auto VT : { MVT::i32, MVT::i64 }) {
454 if (VT == MVT::i64 && !Subtarget.is64Bit())
455 continue;
456 setOperationAction(ISD::ConstantPool , VT, Custom);
457 setOperationAction(ISD::JumpTable , VT, Custom);
458 setOperationAction(ISD::GlobalAddress , VT, Custom);
459 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
460 setOperationAction(ISD::ExternalSymbol , VT, Custom);
461 setOperationAction(ISD::BlockAddress , VT, Custom);
462 }
463
464 // 64-bit shl, sra, srl (iff 32-bit x86)
465 for (auto VT : { MVT::i32, MVT::i64 }) {
466 if (VT == MVT::i64 && !Subtarget.is64Bit())
467 continue;
468 setOperationAction(ISD::SHL_PARTS, VT, Custom);
469 setOperationAction(ISD::SRA_PARTS, VT, Custom);
470 setOperationAction(ISD::SRL_PARTS, VT, Custom);
471 }
472
473 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
474 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
475
476 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
477
478 // Expand certain atomics
479 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
480 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
481 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
483 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
484 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
485 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
486 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
487 }
488
489 if (Subtarget.hasCmpxchg16b()) {
490 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
491 }
492
493 // FIXME - use subtarget debug flags
494 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
495 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
496 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
497 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
498 }
499
500 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
501 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
502
503 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
504 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
505
506 setOperationAction(ISD::TRAP, MVT::Other, Legal);
507 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
508
509 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
510 setOperationAction(ISD::VASTART , MVT::Other, Custom);
511 setOperationAction(ISD::VAEND , MVT::Other, Expand);
512 bool Is64Bit = Subtarget.is64Bit();
513 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
514 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
515
516 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
517 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
518
519 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
520
521 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
522 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
523 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
524
525 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
526 // f32 and f64 use SSE.
527 // Set up the FP register classes.
528 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
529 : &X86::FR32RegClass);
530 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
531 : &X86::FR64RegClass);
532
533 for (auto VT : { MVT::f32, MVT::f64 }) {
534 // Use ANDPD to simulate FABS.
535 setOperationAction(ISD::FABS, VT, Custom);
536
537 // Use XORP to simulate FNEG.
538 setOperationAction(ISD::FNEG, VT, Custom);
539
540 // Use ANDPD and ORPD to simulate FCOPYSIGN.
541 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
542
543 // We don't support sin/cos/fmod
544 setOperationAction(ISD::FSIN , VT, Expand);
545 setOperationAction(ISD::FCOS , VT, Expand);
546 setOperationAction(ISD::FSINCOS, VT, Expand);
547 }
548
549 // Lower this to MOVMSK plus an AND.
550 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
551 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
552
553 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
554 // Use SSE for f32, x87 for f64.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, &X86::FR32RegClass);
557 if (UseX87)
558 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
559
560 // Use ANDPS to simulate FABS.
561 setOperationAction(ISD::FABS , MVT::f32, Custom);
562
563 // Use XORP to simulate FNEG.
564 setOperationAction(ISD::FNEG , MVT::f32, Custom);
565
566 if (UseX87)
567 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
568
569 // Use ANDPS and ORPS to simulate FCOPYSIGN.
570 if (UseX87)
571 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
572 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
573
574 // We don't support sin/cos/fmod
575 setOperationAction(ISD::FSIN , MVT::f32, Expand);
576 setOperationAction(ISD::FCOS , MVT::f32, Expand);
577 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
578
579 if (UseX87) {
580 // Always expand sin/cos functions even though x87 has an instruction.
581 setOperationAction(ISD::FSIN, MVT::f64, Expand);
582 setOperationAction(ISD::FCOS, MVT::f64, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
584 }
585 } else if (UseX87) {
586 // f32 and f64 in x87.
587 // Set up the FP register classes.
588 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
589 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
590
591 for (auto VT : { MVT::f32, MVT::f64 }) {
592 setOperationAction(ISD::UNDEF, VT, Expand);
593 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
594
595 // Always expand sin/cos functions even though x87 has an instruction.
596 setOperationAction(ISD::FSIN , VT, Expand);
597 setOperationAction(ISD::FCOS , VT, Expand);
598 setOperationAction(ISD::FSINCOS, VT, Expand);
599 }
600 }
601
602 // Expand FP32 immediates into loads from the stack, save special cases.
603 if (isTypeLegal(MVT::f32)) {
604 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
605 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
606 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
607 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
608 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
609 } else // SSE immediates.
610 addLegalFPImmediate(APFloat(+0.0f)); // xorps
611 }
612 // Expand FP64 immediates into loads from the stack, save special cases.
613 if (isTypeLegal(MVT::f64)) {
614 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
615 addLegalFPImmediate(APFloat(+0.0)); // FLD0
616 addLegalFPImmediate(APFloat(+1.0)); // FLD1
617 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
618 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
619 } else // SSE immediates.
620 addLegalFPImmediate(APFloat(+0.0)); // xorpd
621 }
622
623 // We don't support FMA.
624 setOperationAction(ISD::FMA, MVT::f64, Expand);
625 setOperationAction(ISD::FMA, MVT::f32, Expand);
626
627 // Long double always uses X87, except f128 in MMX.
628 if (UseX87) {
629 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
630 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
631 : &X86::VR128RegClass);
632 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
633 setOperationAction(ISD::FABS , MVT::f128, Custom);
634 setOperationAction(ISD::FNEG , MVT::f128, Custom);
635 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
636 }
637
638 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
639 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
640 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
641 {
642 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
643 addLegalFPImmediate(TmpFlt); // FLD0
644 TmpFlt.changeSign();
645 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
646
647 bool ignored;
648 APFloat TmpFlt2(+1.0);
649 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
650 &ignored);
651 addLegalFPImmediate(TmpFlt2); // FLD1
652 TmpFlt2.changeSign();
653 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
654 }
655
656 // Always expand sin/cos functions even though x87 has an instruction.
657 setOperationAction(ISD::FSIN , MVT::f80, Expand);
658 setOperationAction(ISD::FCOS , MVT::f80, Expand);
659 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
660
661 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
662 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
663 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
664 setOperationAction(ISD::FRINT, MVT::f80, Expand);
665 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
666 setOperationAction(ISD::FMA, MVT::f80, Expand);
667 }
668
669 // Always use a library call for pow.
670 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
671 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
672 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
673
674 setOperationAction(ISD::FLOG, MVT::f80, Expand);
675 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
676 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
677 setOperationAction(ISD::FEXP, MVT::f80, Expand);
678 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
679 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
680 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
681
682 // Some FP actions are always expanded for vector types.
683 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
684 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
685 setOperationAction(ISD::FSIN, VT, Expand);
686 setOperationAction(ISD::FSINCOS, VT, Expand);
687 setOperationAction(ISD::FCOS, VT, Expand);
688 setOperationAction(ISD::FREM, VT, Expand);
689 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
690 setOperationAction(ISD::FPOW, VT, Expand);
691 setOperationAction(ISD::FLOG, VT, Expand);
692 setOperationAction(ISD::FLOG2, VT, Expand);
693 setOperationAction(ISD::FLOG10, VT, Expand);
694 setOperationAction(ISD::FEXP, VT, Expand);
695 setOperationAction(ISD::FEXP2, VT, Expand);
696 }
697
698 // First set operation action for all vector types to either promote
699 // (for widening) or expand (for scalarization). Then we will selectively
700 // turn on ones that can be effectively codegen'd.
701 for (MVT VT : MVT::vector_valuetypes()) {
702 setOperationAction(ISD::SDIV, VT, Expand);
703 setOperationAction(ISD::UDIV, VT, Expand);
704 setOperationAction(ISD::SREM, VT, Expand);
705 setOperationAction(ISD::UREM, VT, Expand);
706 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
707 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
708 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
709 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
710 setOperationAction(ISD::FMA, VT, Expand);
711 setOperationAction(ISD::FFLOOR, VT, Expand);
712 setOperationAction(ISD::FCEIL, VT, Expand);
713 setOperationAction(ISD::FTRUNC, VT, Expand);
714 setOperationAction(ISD::FRINT, VT, Expand);
715 setOperationAction(ISD::FNEARBYINT, VT, Expand);
716 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
717 setOperationAction(ISD::MULHS, VT, Expand);
718 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
719 setOperationAction(ISD::MULHU, VT, Expand);
720 setOperationAction(ISD::SDIVREM, VT, Expand);
721 setOperationAction(ISD::UDIVREM, VT, Expand);
722 setOperationAction(ISD::CTPOP, VT, Expand);
723 setOperationAction(ISD::CTTZ, VT, Expand);
724 setOperationAction(ISD::CTLZ, VT, Expand);
725 setOperationAction(ISD::ROTL, VT, Expand);
726 setOperationAction(ISD::ROTR, VT, Expand);
727 setOperationAction(ISD::BSWAP, VT, Expand);
728 setOperationAction(ISD::SETCC, VT, Expand);
729 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
730 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
731 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
732 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
733 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
734 setOperationAction(ISD::TRUNCATE, VT, Expand);
735 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
736 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
737 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
738 setOperationAction(ISD::SELECT_CC, VT, Expand);
739 for (MVT InnerVT : MVT::vector_valuetypes()) {
740 setTruncStoreAction(InnerVT, VT, Expand);
741
742 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
743 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
744
745 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
746 // types, we have to deal with them whether we ask for Expansion or not.
747 // Setting Expand causes its own optimisation problems though, so leave
748 // them legal.
749 if (VT.getVectorElementType() == MVT::i1)
750 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
751
752 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
753 // split/scalarized right now.
754 if (VT.getVectorElementType() == MVT::f16)
755 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
756 }
757 }
758
759 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
760 // with -msoft-float, disable use of MMX as well.
761 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
762 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
763 // No operations on x86mmx supported, everything uses intrinsics.
764 }
765
766 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
767 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
768 : &X86::VR128RegClass);
769
770 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
771 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
772 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
773 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
774 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
775 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
777 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
778 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
779 }
780
781 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
782 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
783 : &X86::VR128RegClass);
784
785 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
786 // registers cannot be used even for integer operations.
787 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
788 : &X86::VR128RegClass);
789 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
790 : &X86::VR128RegClass);
791 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
792 : &X86::VR128RegClass);
793 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
794 : &X86::VR128RegClass);
795
796 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
797 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
798 setOperationAction(ISD::SDIV, VT, Custom);
799 setOperationAction(ISD::SREM, VT, Custom);
800 setOperationAction(ISD::UDIV, VT, Custom);
801 setOperationAction(ISD::UREM, VT, Custom);
802 }
803
804 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
805 setOperationAction(ISD::MUL, MVT::v2i16, Custom);
806 setOperationAction(ISD::MUL, MVT::v2i32, Custom);
807 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
808 setOperationAction(ISD::MUL, MVT::v4i16, Custom);
809 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
810
811 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
812 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
813 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
814 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
815 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
816 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
817 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
818 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
819 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
820 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
821 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
822 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
823 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
824
825 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
826 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
827 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
828 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
829 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
830 }
831
832 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
833 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
834 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
835 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
836 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
837 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
838 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
839 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
840
841 if (!ExperimentalVectorWideningLegalization) {
842 // Use widening instead of promotion.
843 for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
844 MVT::v4i16, MVT::v2i16 }) {
845 setOperationAction(ISD::UADDSAT, VT, Custom);
846 setOperationAction(ISD::SADDSAT, VT, Custom);
847 setOperationAction(ISD::USUBSAT, VT, Custom);
848 setOperationAction(ISD::SSUBSAT, VT, Custom);
849 }
850 }
851
852 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
853 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
854 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
855
856 // Provide custom widening for v2f32 setcc. This is really for VLX when
857 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
858 // type legalization changing the result type to v4i1 during widening.
859 // It works fine for SSE2 and is probably faster so no need to qualify with
860 // VLX support.
861 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
862
863 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
864 setOperationAction(ISD::SETCC, VT, Custom);
865 setOperationAction(ISD::CTPOP, VT, Custom);
866
867 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
868 // setcc all the way to isel and prefer SETGT in some isel patterns.
869 setCondCodeAction(ISD::SETLT, VT, Custom);
870 setCondCodeAction(ISD::SETLE, VT, Custom);
871 }
872
873 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
874 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
875 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
876 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
877 setOperationAction(ISD::VSELECT, VT, Custom);
878 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
879 }
880
881 // We support custom legalizing of sext and anyext loads for specific
882 // memory vector types which we can load as a scalar (or sequence of
883 // scalars) and extend in-register to a legal 128-bit vector type. For sext
884 // loads these must work with a single scalar load.
885 for (MVT VT : MVT::integer_vector_valuetypes()) {
886 if (!ExperimentalVectorWideningLegalization) {
887 // We don't want narrow result types here when widening.
888 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
889 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
890 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
891 }
892 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
893 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
896 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
897 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
898 }
899
900 if (ExperimentalVectorWideningLegalization &&
901 !Subtarget.hasSSE41() && Subtarget.is64Bit()) {
902 // This lets DAG combine create sextloads that get split and scalarized.
903 // TODO: Does this make sense? What about v2i8->v2i64?
904 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Custom);
905 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Custom);
906 }
907
908 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
909 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
910 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
911 setOperationAction(ISD::VSELECT, VT, Custom);
912
913 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
914 continue;
915
916 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
917 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
918 }
919
920 // Custom lower v2i64 and v2f64 selects.
921 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
922 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
923 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
924 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
925 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
926
927 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
928 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
929 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
930
931 // Custom legalize these to avoid over promotion or custom promotion.
932 setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
933 setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
934 setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
935 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
936 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
937 setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
938 setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
939 setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
940 setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
941 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
942
943 // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
944 // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
945 // split again based on the input type, this will cause an AssertSExt i16 to
946 // be emitted instead of an AssertZExt. This will allow packssdw followed by
947 // packuswb to be used to truncate to v8i8. This is necessary since packusdw
948 // isn't available until sse4.1.
949 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
950
951 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
952 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
953
954 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
955
956 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
957 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
958
959 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
960 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
961
962 for (MVT VT : MVT::fp_vector_valuetypes())
963 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
964
965 // We want to legalize this to an f64 load rather than an i64 load on
966 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
967 // store.
968 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
969 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
970 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
971 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
972 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
973 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
974 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
975 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
976
977 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
978 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
979 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
980 if (!Subtarget.hasAVX512())
981 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
982
983 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
984 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
985 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
986
987 if (ExperimentalVectorWideningLegalization) {
988 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
989
990 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
991 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
992 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
993 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
994 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
995 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
996 }
997
998 // In the customized shift lowering, the legal v4i32/v2i64 cases
999 // in AVX2 will be recognized.
1000 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1001 setOperationAction(ISD::SRL, VT, Custom);
1002 setOperationAction(ISD::SHL, VT, Custom);
1003 setOperationAction(ISD::SRA, VT, Custom);
1004 }
1005
1006 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1007 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1008
1009 // With AVX512, expanding (and promoting the shifts) is better.
1010 if (!Subtarget.hasAVX512())
1011 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1012 }
1013
1014 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1015 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1016 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1017 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1018 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1019 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1020 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1021 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1022 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1023 }
1024
1025 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1026 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1027 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1028 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1029 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1030 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1031 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1032 }
1033
1034 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1035 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1036 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1037 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1038 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1039 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1040 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1041 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1042
1043 // FIXME: Do we need to handle scalar-to-vector here?
1044 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1045
1046 // We directly match byte blends in the backend as they match the VSELECT
1047 // condition form.
1048 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1049
1050 // SSE41 brings specific instructions for doing vector sign extend even in
1051 // cases where we don't have SRA.
1052 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1053 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1054 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1055 }
1056
1057 if (!ExperimentalVectorWideningLegalization) {
1058 // Avoid narrow result types when widening. The legal types are listed
1059 // in the next loop.
1060 for (MVT VT : MVT::integer_vector_valuetypes()) {
1061 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1062 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1063 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1064 }
1065 }
1066
1067 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1068 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1069 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1070 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1071 if (!ExperimentalVectorWideningLegalization)
1072 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
1073 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1074 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1075 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1076 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1077 }
1078
1079 // i8 vectors are custom because the source register and source
1080 // source memory operand types are not the same width.
1081 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1082 }
1083
1084 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1085 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1086 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1087 setOperationAction(ISD::ROTL, VT, Custom);
1088
1089 // XOP can efficiently perform BITREVERSE with VPPERM.
1090 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1091 setOperationAction(ISD::BITREVERSE, VT, Custom);
1092
1093 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1094 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1095 setOperationAction(ISD::BITREVERSE, VT, Custom);
1096 }
1097
1098 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1099 bool HasInt256 = Subtarget.hasInt256();
1100
1101 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1102 : &X86::VR256RegClass);
1103 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1104 : &X86::VR256RegClass);
1105 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1106 : &X86::VR256RegClass);
1107 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1108 : &X86::VR256RegClass);
1109 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1110 : &X86::VR256RegClass);
1111 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1112 : &X86::VR256RegClass);
1113
1114 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1115 setOperationAction(ISD::FFLOOR, VT, Legal);
1116 setOperationAction(ISD::FCEIL, VT, Legal);
1117 setOperationAction(ISD::FTRUNC, VT, Legal);
1118 setOperationAction(ISD::FRINT, VT, Legal);
1119 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1120 setOperationAction(ISD::FNEG, VT, Custom);
1121 setOperationAction(ISD::FABS, VT, Custom);
1122 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1123 }
1124
1125 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1126 // even though v8i16 is a legal type.
1127 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1128 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1129 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1130
1131 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1132 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1133
1134 if (!Subtarget.hasAVX512())
1135 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1136
1137 for (MVT VT : MVT::fp_vector_valuetypes())
1138 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1139
1140 // In the customized shift lowering, the legal v8i32/v4i64 cases
1141 // in AVX2 will be recognized.
1142 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1143 setOperationAction(ISD::SRL, VT, Custom);
1144 setOperationAction(ISD::SHL, VT, Custom);
1145 setOperationAction(ISD::SRA, VT, Custom);
1146 }
1147
1148 if (ExperimentalVectorWideningLegalization) {
1149 // These types need custom splitting if their input is a 128-bit vector.
1150 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1151 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1152 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1153 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1154 }
1155
1156 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1157 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1158
1159 // With BWI, expanding (and promoting the shifts) is the better.
1160 if (!Subtarget.hasBWI())
1161 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1162
1163 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1164 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1165 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1166 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1167 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1168 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1169
1170 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1171 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1172 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1173 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1174 }
1175
1176 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1177 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1178 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1179 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1180
1181 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1182 setOperationAction(ISD::SETCC, VT, Custom);
1183 setOperationAction(ISD::CTPOP, VT, Custom);
1184 setOperationAction(ISD::CTLZ, VT, Custom);
1185
1186 // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
1187 setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
1188
1189 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1190 // setcc all the way to isel and prefer SETGT in some isel patterns.
1191 setCondCodeAction(ISD::SETLT, VT, Custom);
1192 setCondCodeAction(ISD::SETLE, VT, Custom);
1193 }
1194
1195 if (Subtarget.hasAnyFMA()) {
1196 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1197 MVT::v2f64, MVT::v4f64 })
1198 setOperationAction(ISD::FMA, VT, Legal);
1199 }
1200
1201 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1202 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1203 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1204 }
1205
1206 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1207 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1208 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1209 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1210
1211 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1212 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1213 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1214 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1215 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1216 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1217
1218 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1219 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1220 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1221 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1222
1223 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1224 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1225 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1226 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1227 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1228 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1229 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1230 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1231
1232 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1233 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1234 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1235 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1236 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1237 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1238 }
1239
1240 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1241 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1242 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1243 }
1244
1245 if (HasInt256) {
1246 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1247 // when we have a 256bit-wide blend with immediate.
1248 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1249
1250 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1251 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1252 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1253 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1254 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1255 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1256 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1257 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1258 }
1259 }
1260
1261 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1262 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1263 setOperationAction(ISD::MLOAD, VT, Legal);
1264 setOperationAction(ISD::MSTORE, VT, Legal);
1265 }
1266
1267 // Extract subvector is special because the value type
1268 // (result) is 128-bit but the source is 256-bit wide.
1269 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1270 MVT::v4f32, MVT::v2f64 }) {
1271 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1272 }
1273
1274 // Custom lower several nodes for 256-bit types.
1275 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1276 MVT::v8f32, MVT::v4f64 }) {
1277 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1278 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1279 setOperationAction(ISD::VSELECT, VT, Custom);
1280 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1281 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1282 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1283 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1284 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1285 }
1286
1287 if (HasInt256)
1288 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1289
1290 if (HasInt256) {
1291 // Custom legalize 2x32 to get a little better code.
1292 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1293 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1294
1295 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1296 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1297 setOperationAction(ISD::MGATHER, VT, Custom);
1298 }
1299 }
1300
1301 // This block controls legalization of the mask vector sizes that are
1302 // available with AVX512. 512-bit vectors are in a separate block controlled
1303 // by useAVX512Regs.
1304 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1305 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1306 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1307 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1308 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1309 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1310
1311 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1312 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1313 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1314
1315 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1316 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1317 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1318 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1319 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1320 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1321
1322 // There is no byte sized k-register load or store without AVX512DQ.
1323 if (!Subtarget.hasDQI()) {
1324 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1325 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1326 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1327 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1328
1329 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1330 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1331 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1332 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1333 }
1334
1335 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1336 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1337 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1338 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1339 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1340 }
1341
1342 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1343 setOperationAction(ISD::ADD, VT, Custom);
1344 setOperationAction(ISD::SUB, VT, Custom);
1345 setOperationAction(ISD::MUL, VT, Custom);
1346 setOperationAction(ISD::SETCC, VT, Custom);
1347 setOperationAction(ISD::SELECT, VT, Custom);
1348 setOperationAction(ISD::TRUNCATE, VT, Custom);
1349 setOperationAction(ISD::UADDSAT, VT, Custom);
1350 setOperationAction(ISD::SADDSAT, VT, Custom);
1351 setOperationAction(ISD::USUBSAT, VT, Custom);
1352 setOperationAction(ISD::SSUBSAT, VT, Custom);
1353
1354 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1355 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1356 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1357 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1358 setOperationAction(ISD::VSELECT, VT, Expand);
1359 }
1360
1361 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1362 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1363 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1364 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1365 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1366 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1367 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1368 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1369 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1370 }
1371
1372 // This block controls legalization for 512-bit operations with 32/64 bit
1373 // elements. 512-bits can be disabled based on prefer-vector-width and
1374 // required-vector-width function attributes.
1375 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1376 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1377 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1378 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1379 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1380
1381 for (MVT VT : MVT::fp_vector_valuetypes())
1382 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1383
1384 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1385 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1386 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1387 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1388 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1389 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1390 }
1391
1392 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1393 setOperationAction(ISD::FNEG, VT, Custom);
1394 setOperationAction(ISD::FABS, VT, Custom);
1395 setOperationAction(ISD::FMA, VT, Legal);
1396 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1397 }
1398
1399 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1400 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1401 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1402 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1403 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1404 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1405 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1406 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1407 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1408 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1409
1410 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1411 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1412 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1413 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1414 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1415
1416 if (!Subtarget.hasVLX()) {
1417 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1418 // to 512-bit rather than use the AVX2 instructions so that we can use
1419 // k-masks.
1420 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1421 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1422 setOperationAction(ISD::MLOAD, VT, Custom);
1423 setOperationAction(ISD::MSTORE, VT, Custom);
1424 }
1425 }
1426
1427 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1428 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1429 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1430 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1431 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1432 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1433 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1434 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1435
1436 if (ExperimentalVectorWideningLegalization) {
1437 // Need to custom widen this if we don't have AVX512BW.
1438 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1439 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1440 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1441 }
1442
1443 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1444 setOperationAction(ISD::FFLOOR, VT, Legal);
1445 setOperationAction(ISD::FCEIL, VT, Legal);
1446 setOperationAction(ISD::FTRUNC, VT, Legal);
1447 setOperationAction(ISD::FRINT, VT, Legal);
1448 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1449 }
1450
1451 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1452 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1453 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1454 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1455 }
1456
1457 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1458 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1459 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1460 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1461
1462 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1463 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1464
1465 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1466 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1467
1468 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1469 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1470 setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
1471 setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
1472 setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
1473 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1474
1475 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1476 setOperationAction(ISD::SMAX, VT, Legal);
1477 setOperationAction(ISD::UMAX, VT, Legal);
1478 setOperationAction(ISD::SMIN, VT, Legal);
1479 setOperationAction(ISD::UMIN, VT, Legal);
1480 setOperationAction(ISD::ABS, VT, Legal);
1481 setOperationAction(ISD::SRL, VT, Custom);
1482 setOperationAction(ISD::SHL, VT, Custom);
1483 setOperationAction(ISD::SRA, VT, Custom);
1484 setOperationAction(ISD::CTPOP, VT, Custom);
1485 setOperationAction(ISD::ROTL, VT, Custom);
1486 setOperationAction(ISD::ROTR, VT, Custom);
1487 setOperationAction(ISD::SETCC, VT, Custom);
1488
1489 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1490 // setcc all the way to isel and prefer SETGT in some isel patterns.
1491 setCondCodeAction(ISD::SETLT, VT, Custom);
1492 setCondCodeAction(ISD::SETLE, VT, Custom);
1493 }
1494
1495 if (Subtarget.hasDQI()) {
1496 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1497 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1498 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1499 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1500
1501 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1502 }
1503
1504 if (Subtarget.hasCDI()) {
1505 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1506 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1507 setOperationAction(ISD::CTLZ, VT, Legal);
1508 }
1509 } // Subtarget.hasCDI()
1510
1511 if (Subtarget.hasVPOPCNTDQ()) {
1512 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1513 setOperationAction(ISD::CTPOP, VT, Legal);
1514 }
1515
1516 // Extract subvector is special because the value type
1517 // (result) is 256-bit but the source is 512-bit wide.
1518 // 128-bit was made Legal under AVX1.
1519 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1520 MVT::v8f32, MVT::v4f64 })
1521 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1522
1523 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1524 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1525 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1526 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1527 setOperationAction(ISD::VSELECT, VT, Custom);
1528 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1529 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1530 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1531 setOperationAction(ISD::MLOAD, VT, Legal);
1532 setOperationAction(ISD::MSTORE, VT, Legal);
1533 setOperationAction(ISD::MGATHER, VT, Custom);
1534 setOperationAction(ISD::MSCATTER, VT, Custom);
1535 }
1536 // Need to custom split v32i16/v64i8 bitcasts.
1537 if (!Subtarget.hasBWI()) {
1538 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1539 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1540 }
1541 }// has AVX-512
1542
1543 // This block controls legalization for operations that don't have
1544 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1545 // narrower widths.
1546 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1547 // These operations are handled on non-VLX by artificially widening in
1548 // isel patterns.
1549 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1550
1551 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1552 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1553 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1554 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1555 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1556
1557 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1558 setOperationAction(ISD::SMAX, VT, Legal);
1559 setOperationAction(ISD::UMAX, VT, Legal);
1560 setOperationAction(ISD::SMIN, VT, Legal);
1561 setOperationAction(ISD::UMIN, VT, Legal);
1562 setOperationAction(ISD::ABS, VT, Legal);
1563 }
1564
1565 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1566 setOperationAction(ISD::ROTL, VT, Custom);
1567 setOperationAction(ISD::ROTR, VT, Custom);
1568 }
1569
1570 // Custom legalize 2x32 to get a little better code.
1571 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1572 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1573
1574 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1575 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1576 setOperationAction(ISD::MSCATTER, VT, Custom);
1577
1578 if (Subtarget.hasDQI()) {
1579 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1580 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1581 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1582 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1583 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1584
1585 setOperationAction(ISD::MUL, VT, Legal);
1586 }
1587 }
1588
1589 if (Subtarget.hasCDI()) {
1590 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1591 setOperationAction(ISD::CTLZ, VT, Legal);
1592 }
1593 } // Subtarget.hasCDI()
1594
1595 if (Subtarget.hasVPOPCNTDQ()) {
1596 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1597 setOperationAction(ISD::CTPOP, VT, Legal);
1598 }
1599 }
1600
1601 // This block control legalization of v32i1/v64i1 which are available with
1602 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1603 // useBWIRegs.
1604 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1605 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1606 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1607
1608 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1609 setOperationAction(ISD::ADD, VT, Custom);
1610 setOperationAction(ISD::SUB, VT, Custom);
1611 setOperationAction(ISD::MUL, VT, Custom);
1612 setOperationAction(ISD::VSELECT, VT, Expand);
1613 setOperationAction(ISD::UADDSAT, VT, Custom);
1614 setOperationAction(ISD::SADDSAT, VT, Custom);
1615 setOperationAction(ISD::USUBSAT, VT, Custom);
1616 setOperationAction(ISD::SSUBSAT, VT, Custom);
1617
1618 setOperationAction(ISD::TRUNCATE, VT, Custom);
1619 setOperationAction(ISD::SETCC, VT, Custom);
1620 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1621 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1622 setOperationAction(ISD::SELECT, VT, Custom);
1623 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1624 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1625 }
1626
1627 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1628 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1629 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1630 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1631 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1632 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1633
1634 // Extends from v32i1 masks to 256-bit vectors.
1635 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1636 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1637 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1638 }
1639
1640 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1641 // disabled based on prefer-vector-width and required-vector-width function
1642 // attributes.
1643 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1644 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1645 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1646
1647 // Extends from v64i1 masks to 512-bit vectors.
1648 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1649 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1650 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1651
1652 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1653 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1654 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1655 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1656 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1657 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1658 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1659 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1660 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1661 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1662 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1663 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1664 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1665 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1666 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1667 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1668 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1669 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1670 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1671 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1672 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1673 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1674 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1675
1676 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1677 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1678
1679 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1680
1681 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1682 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1683 setOperationAction(ISD::VSELECT, VT, Custom);
1684 setOperationAction(ISD::ABS, VT, Legal);
1685 setOperationAction(ISD::SRL, VT, Custom);
1686 setOperationAction(ISD::SHL, VT, Custom);
1687 setOperationAction(ISD::SRA, VT, Custom);
1688 setOperationAction(ISD::MLOAD, VT, Legal);
1689 setOperationAction(ISD::MSTORE, VT, Legal);
1690 setOperationAction(ISD::CTPOP, VT, Custom);
1691 setOperationAction(ISD::CTLZ, VT, Custom);
1692 setOperationAction(ISD::SMAX, VT, Legal);
1693 setOperationAction(ISD::UMAX, VT, Legal);
1694 setOperationAction(ISD::SMIN, VT, Legal);
1695 setOperationAction(ISD::UMIN, VT, Legal);
1696 setOperationAction(ISD::SETCC, VT, Custom);
1697 setOperationAction(ISD::UADDSAT, VT, Legal);
1698 setOperationAction(ISD::SADDSAT, VT, Legal);
1699 setOperationAction(ISD::USUBSAT, VT, Legal);
1700 setOperationAction(ISD::SSUBSAT, VT, Legal);
1701
1702 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1703 // setcc all the way to isel and prefer SETGT in some isel patterns.
1704 setCondCodeAction(ISD::SETLT, VT, Custom);
1705 setCondCodeAction(ISD::SETLE, VT, Custom);
1706 }
1707
1708 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1709 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1710 }
1711
1712 if (Subtarget.hasBITALG()) {
1713 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1714 setOperationAction(ISD::CTPOP, VT, Legal);
1715 }
1716 }
1717
1718 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1719 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1720 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1721 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1722 }
1723
1724 // These operations are handled on non-VLX by artificially widening in
1725 // isel patterns.
1726 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1727
1728 if (Subtarget.hasBITALG()) {
1729 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1730 setOperationAction(ISD::CTPOP, VT, Legal);
1731 }
1732 }
1733
1734 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1735 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1736 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1737 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1738 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1739 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1740
1741 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1742 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1743 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1744 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1745 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1746
1747 if (Subtarget.hasDQI()) {
1748 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1749 // v2f32 UINT_TO_FP is already custom under SSE2.
1750 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1751 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 1752, __PRETTY_FUNCTION__))
1752 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 1752, __PRETTY_FUNCTION__))
;
1753 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1754 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1755 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1756 }
1757
1758 if (Subtarget.hasBWI()) {
1759 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1760 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1761 }
1762 }
1763
1764 // We want to custom lower some of our intrinsics.
1765 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1766 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1767 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1768 if (!Subtarget.is64Bit()) {
1769 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1770 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1771 }
1772
1773 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1774 // handle type legalization for these operations here.
1775 //
1776 // FIXME: We really should do custom legalization for addition and
1777 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1778 // than generic legalization for 64-bit multiplication-with-overflow, though.
1779 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1780 if (VT == MVT::i64 && !Subtarget.is64Bit())
1781 continue;
1782 // Add/Sub/Mul with overflow operations are custom lowered.
1783 setOperationAction(ISD::SADDO, VT, Custom);
1784 setOperationAction(ISD::UADDO, VT, Custom);
1785 setOperationAction(ISD::SSUBO, VT, Custom);
1786 setOperationAction(ISD::USUBO, VT, Custom);
1787 setOperationAction(ISD::SMULO, VT, Custom);
1788 setOperationAction(ISD::UMULO, VT, Custom);
1789
1790 // Support carry in as value rather than glue.
1791 setOperationAction(ISD::ADDCARRY, VT, Custom);
1792 setOperationAction(ISD::SUBCARRY, VT, Custom);
1793 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1794 }
1795
1796 if (!Subtarget.is64Bit()) {
1797 // These libcalls are not available in 32-bit.
1798 setLibcallName(RTLIB::SHL_I128, nullptr);
1799 setLibcallName(RTLIB::SRL_I128, nullptr);
1800 setLibcallName(RTLIB::SRA_I128, nullptr);
1801 setLibcallName(RTLIB::MUL_I128, nullptr);
1802 }
1803
1804 // Combine sin / cos into _sincos_stret if it is available.
1805 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1806 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1807 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1808 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1809 }
1810
1811 if (Subtarget.isTargetWin64()) {
1812 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1813 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1814 setOperationAction(ISD::SREM, MVT::i128, Custom);
1815 setOperationAction(ISD::UREM, MVT::i128, Custom);
1816 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1817 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1818 }
1819
1820 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1821 // is. We should promote the value to 64-bits to solve this.
1822 // This is what the CRT headers do - `fmodf` is an inline header
1823 // function casting to f64 and calling `fmod`.
1824 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1825 Subtarget.isTargetWindowsItanium()))
1826 for (ISD::NodeType Op :
1827 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1828 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1829 if (isOperationExpand(Op, MVT::f32))
1830 setOperationAction(Op, MVT::f32, Promote);
1831
1832 // We have target-specific dag combine patterns for the following nodes:
1833 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1834 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1835 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1836 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1837 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1838 setTargetDAGCombine(ISD::BITCAST);
1839 setTargetDAGCombine(ISD::VSELECT);
1840 setTargetDAGCombine(ISD::SELECT);
1841 setTargetDAGCombine(ISD::SHL);
1842 setTargetDAGCombine(ISD::SRA);
1843 setTargetDAGCombine(ISD::SRL);
1844 setTargetDAGCombine(ISD::OR);
1845 setTargetDAGCombine(ISD::AND);
1846 setTargetDAGCombine(ISD::ADD);
1847 setTargetDAGCombine(ISD::FADD);
1848 setTargetDAGCombine(ISD::FSUB);
1849 setTargetDAGCombine(ISD::FNEG);
1850 setTargetDAGCombine(ISD::FMA);
1851 setTargetDAGCombine(ISD::FMINNUM);
1852 setTargetDAGCombine(ISD::FMAXNUM);
1853 setTargetDAGCombine(ISD::SUB);
1854 setTargetDAGCombine(ISD::LOAD);
1855 setTargetDAGCombine(ISD::MLOAD);
1856 setTargetDAGCombine(ISD::STORE);
1857 setTargetDAGCombine(ISD::MSTORE);
1858 setTargetDAGCombine(ISD::TRUNCATE);
1859 setTargetDAGCombine(ISD::ZERO_EXTEND);
1860 setTargetDAGCombine(ISD::ANY_EXTEND);
1861 setTargetDAGCombine(ISD::SIGN_EXTEND);
1862 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1863 setTargetDAGCombine(ISD::SINT_TO_FP);
1864 setTargetDAGCombine(ISD::UINT_TO_FP);
1865 setTargetDAGCombine(ISD::SETCC);
1866 setTargetDAGCombine(ISD::MUL);
1867 setTargetDAGCombine(ISD::XOR);
1868 setTargetDAGCombine(ISD::MSCATTER);
1869 setTargetDAGCombine(ISD::MGATHER);
1870
1871 computeRegisterProperties(Subtarget.getRegisterInfo());
1872
1873 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1874 MaxStoresPerMemsetOptSize = 8;
1875 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1876 MaxStoresPerMemcpyOptSize = 4;
1877 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1878 MaxStoresPerMemmoveOptSize = 4;
1879
1880 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1881 // that needs to benchmarked and balanced with the potential use of vector
1882 // load/store types (PR33329, PR33914).
1883 MaxLoadsPerMemcmp = 2;
1884 MaxLoadsPerMemcmpOptSize = 2;
1885
1886 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1887 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1888
1889 // An out-of-order CPU can speculatively execute past a predictable branch,
1890 // but a conditional move could be stalled by an expensive earlier operation.
1891 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1892 EnableExtLdPromotion = true;
1893 setPrefFunctionAlignment(4); // 2^4 bytes.
1894
1895 verifyIntrinsicTables();
1896}
1897
1898// This has so far only been implemented for 64-bit MachO.
1899bool X86TargetLowering::useLoadStackGuardNode() const {
1900 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1901}
1902
1903bool X86TargetLowering::useStackGuardXorFP() const {
1904 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1905 return Subtarget.getTargetTriple().isOSMSVCRT();
1906}
1907
1908SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1909 const SDLoc &DL) const {
1910 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1911 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1912 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1913 return SDValue(Node, 0);
1914}
1915
1916TargetLoweringBase::LegalizeTypeAction
1917X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1918 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1919 return TypeSplitVector;
1920
1921 if (ExperimentalVectorWideningLegalization &&
1922 VT.getVectorNumElements() != 1 &&
1923 VT.getVectorElementType() != MVT::i1)
1924 return TypeWidenVector;
1925
1926 return TargetLoweringBase::getPreferredVectorAction(VT);
1927}
1928
1929MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1930 CallingConv::ID CC,
1931 EVT VT) const {
1932 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1933 return MVT::v32i8;
1934 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1935}
1936
1937unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1938 CallingConv::ID CC,
1939 EVT VT) const {
1940 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1941 return 1;
1942 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1943}
1944
1945EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1946 LLVMContext& Context,
1947 EVT VT) const {
1948 if (!VT.isVector())
1949 return MVT::i8;
1950
1951 if (Subtarget.hasAVX512()) {
1952 const unsigned NumElts = VT.getVectorNumElements();
1953
1954 // Figure out what this type will be legalized to.
1955 EVT LegalVT = VT;
1956 while (getTypeAction(Context, LegalVT) != TypeLegal)
1957 LegalVT = getTypeToTransformTo(Context, LegalVT);
1958
1959 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1960 if (LegalVT.getSimpleVT().is512BitVector())
1961 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1962
1963 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1964 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1965 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1966 // vXi16/vXi8.
1967 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1968 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1969 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1970 }
1971 }
1972
1973 return VT.changeVectorElementTypeToInteger();
1974}
1975
1976/// Helper for getByValTypeAlignment to determine
1977/// the desired ByVal argument alignment.
1978static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1979 if (MaxAlign == 16)
1980 return;
1981 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1982 if (VTy->getBitWidth() == 128)
1983 MaxAlign = 16;
1984 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1985 unsigned EltAlign = 0;
1986 getMaxByValAlign(ATy->getElementType(), EltAlign);
1987 if (EltAlign > MaxAlign)
1988 MaxAlign = EltAlign;
1989 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1990 for (auto *EltTy : STy->elements()) {
1991 unsigned EltAlign = 0;
1992 getMaxByValAlign(EltTy, EltAlign);
1993 if (EltAlign > MaxAlign)
1994 MaxAlign = EltAlign;
1995 if (MaxAlign == 16)
1996 break;
1997 }
1998 }
1999}
2000
2001/// Return the desired alignment for ByVal aggregate
2002/// function arguments in the caller parameter area. For X86, aggregates
2003/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2004/// are at 4-byte boundaries.
2005unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2006 const DataLayout &DL) const {
2007 if (Subtarget.is64Bit()) {
2008 // Max of 8 and alignment of type.
2009 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2010 if (TyAlign > 8)
2011 return TyAlign;
2012 return 8;
2013 }
2014
2015 unsigned Align = 4;
2016 if (Subtarget.hasSSE1())
2017 getMaxByValAlign(Ty, Align);
2018 return Align;
2019}
2020
2021/// Returns the target specific optimal type for load
2022/// and store operations as a result of memset, memcpy, and memmove
2023/// lowering. If DstAlign is zero that means it's safe to destination
2024/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2025/// means there isn't a need to check it against alignment requirement,
2026/// probably because the source does not need to be loaded. If 'IsMemset' is
2027/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2028/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2029/// source is constant so it does not need to be loaded.
2030/// It returns EVT::Other if the type should be determined using generic
2031/// target-independent logic.
2032EVT
2033X86TargetLowering::getOptimalMemOpType(uint64_t Size,
2034 unsigned DstAlign, unsigned SrcAlign,
2035 bool IsMemset, bool ZeroMemset,
2036 bool MemcpyStrSrc,
2037 MachineFunction &MF) const {
2038 const Function &F = MF.getFunction();
2039 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
2040 if (Size >= 16 &&
2041 (!Subtarget.isUnalignedMem16Slow() ||
2042 ((DstAlign == 0 || DstAlign >= 16) &&
2043 (SrcAlign == 0 || SrcAlign >= 16)))) {
2044 // FIXME: Check if unaligned 32-byte accesses are slow.
2045 if (Size >= 32 && Subtarget.hasAVX()) {
2046 // Although this isn't a well-supported type for AVX1, we'll let
2047 // legalization and shuffle lowering produce the optimal codegen. If we
2048 // choose an optimal type with a vector element larger than a byte,
2049 // getMemsetStores() may create an intermediate splat (using an integer
2050 // multiply) before we splat as a vector.
2051 return MVT::v32i8;
2052 }
2053 if (Subtarget.hasSSE2())
2054 return MVT::v16i8;
2055 // TODO: Can SSE1 handle a byte vector?
2056 // If we have SSE1 registers we should be able to use them.
2057 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
2058 return MVT::v4f32;
2059 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2060 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2061 // Do not use f64 to lower memcpy if source is string constant. It's
2062 // better to use i32 to avoid the loads.
2063 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2064 // The gymnastics of splatting a byte value into an XMM register and then
2065 // only using 8-byte stores (because this is a CPU with slow unaligned
2066 // 16-byte accesses) makes that a loser.
2067 return MVT::f64;
2068 }
2069 }
2070 // This is a compromise. If we reach here, unaligned accesses may be slow on
2071 // this target. However, creating smaller, aligned accesses could be even
2072 // slower and would certainly be a lot more code.
2073 if (Subtarget.is64Bit() && Size >= 8)
2074 return MVT::i64;
2075 return MVT::i32;
2076}
2077
2078bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2079 if (VT == MVT::f32)
2080 return X86ScalarSSEf32;
2081 else if (VT == MVT::f64)
2082 return X86ScalarSSEf64;
2083 return true;
2084}
2085
2086bool
2087X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2088 unsigned,
2089 unsigned,
2090 bool *Fast) const {
2091 if (Fast) {
2092 switch (VT.getSizeInBits()) {
2093 default:
2094 // 8-byte and under are always assumed to be fast.
2095 *Fast = true;
2096 break;
2097 case 128:
2098 *Fast = !Subtarget.isUnalignedMem16Slow();
2099 break;
2100 case 256:
2101 *Fast = !Subtarget.isUnalignedMem32Slow();
2102 break;
2103 // TODO: What about AVX-512 (512-bit) accesses?
2104 }
2105 }
2106 // Misaligned accesses of any size are always allowed.
2107 return true;
2108}
2109
2110/// Return the entry encoding for a jump table in the
2111/// current function. The returned value is a member of the
2112/// MachineJumpTableInfo::JTEntryKind enum.
2113unsigned X86TargetLowering::getJumpTableEncoding() const {
2114 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2115 // symbol.
2116 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2117 return MachineJumpTableInfo::EK_Custom32;
2118
2119 // Otherwise, use the normal jump table encoding heuristics.
2120 return TargetLowering::getJumpTableEncoding();
2121}
2122
2123bool X86TargetLowering::useSoftFloat() const {
2124 return Subtarget.useSoftFloat();
2125}
2126
2127void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2128 ArgListTy &Args) const {
2129
2130 // Only relabel X86-32 for C / Stdcall CCs.
2131 if (Subtarget.is64Bit())
2132 return;
2133 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2134 return;
2135 unsigned ParamRegs = 0;
2136 if (auto *M = MF->getFunction().getParent())
2137 ParamRegs = M->getNumberRegisterParameters();
2138
2139 // Mark the first N int arguments as having reg
2140 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2141 Type *T = Args[Idx].Ty;
2142 if (T->isIntOrPtrTy())
2143 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2144 unsigned numRegs = 1;
2145 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2146 numRegs = 2;
2147 if (ParamRegs < numRegs)
2148 return;
2149 ParamRegs -= numRegs;
2150 Args[Idx].IsInReg = true;
2151 }
2152 }
2153}
2154
2155const MCExpr *
2156X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2157 const MachineBasicBlock *MBB,
2158 unsigned uid,MCContext &Ctx) const{
2159 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2159, __PRETTY_FUNCTION__))
;
2160 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2161 // entries.
2162 return MCSymbolRefExpr::create(MBB->getSymbol(),
2163 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2164}
2165
2166/// Returns relocation base for the given PIC jumptable.
2167SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2168 SelectionDAG &DAG) const {
2169 if (!Subtarget.is64Bit())
2170 // This doesn't have SDLoc associated with it, but is not really the
2171 // same as a Register.
2172 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2173 getPointerTy(DAG.getDataLayout()));
2174 return Table;
2175}
2176
2177/// This returns the relocation base for the given PIC jumptable,
2178/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2179const MCExpr *X86TargetLowering::
2180getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2181 MCContext &Ctx) const {
2182 // X86-64 uses RIP relative addressing based on the jump table label.
2183 if (Subtarget.isPICStyleRIPRel())
2184 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2185
2186 // Otherwise, the reference is relative to the PIC base.
2187 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2188}
2189
2190std::pair<const TargetRegisterClass *, uint8_t>
2191X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2192 MVT VT) const {
2193 const TargetRegisterClass *RRC = nullptr;
2194 uint8_t Cost = 1;
2195 switch (VT.SimpleTy) {
2196 default:
2197 return TargetLowering::findRepresentativeClass(TRI, VT);
2198 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2199 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2200 break;
2201 case MVT::x86mmx:
2202 RRC = &X86::VR64RegClass;
2203 break;
2204 case MVT::f32: case MVT::f64:
2205 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2206 case MVT::v4f32: case MVT::v2f64:
2207 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2208 case MVT::v8f32: case MVT::v4f64:
2209 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2210 case MVT::v16f32: case MVT::v8f64:
2211 RRC = &X86::VR128XRegClass;
2212 break;
2213 }
2214 return std::make_pair(RRC, Cost);
2215}
2216
2217unsigned X86TargetLowering::getAddressSpace() const {
2218 if (Subtarget.is64Bit())
2219 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2220 return 256;
2221}
2222
2223static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2224 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2225 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2226}
2227
2228static Constant* SegmentOffset(IRBuilder<> &IRB,
2229 unsigned Offset, unsigned AddressSpace) {
2230 return ConstantExpr::getIntToPtr(
2231 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2232 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2233}
2234
2235Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2236 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2237 // tcbhead_t; use it instead of the usual global variable (see
2238 // sysdeps/{i386,x86_64}/nptl/tls.h)
2239 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2240 if (Subtarget.isTargetFuchsia()) {
2241 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2242 return SegmentOffset(IRB, 0x10, getAddressSpace());
2243 } else {
2244 // %fs:0x28, unless we're using a Kernel code model, in which case
2245 // it's %gs:0x28. gs:0x14 on i386.
2246 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2247 return SegmentOffset(IRB, Offset, getAddressSpace());
2248 }
2249 }
2250
2251 return TargetLowering::getIRStackGuard(IRB);
2252}
2253
2254void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2255 // MSVC CRT provides functionalities for stack protection.
2256 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2257 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2258 // MSVC CRT has a global variable holding security cookie.
2259 M.getOrInsertGlobal("__security_cookie",
2260 Type::getInt8PtrTy(M.getContext()));
2261
2262 // MSVC CRT has a function to validate security cookie.
2263 auto *SecurityCheckCookie = cast<Function>(
2264 M.getOrInsertFunction("__security_check_cookie",
2265 Type::getVoidTy(M.getContext()),
2266 Type::getInt8PtrTy(M.getContext())));
2267 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2268 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2269 return;
2270 }
2271 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2272 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2273 return;
2274 TargetLowering::insertSSPDeclarations(M);
2275}
2276
2277Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2278 // MSVC CRT has a global variable holding security cookie.
2279 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2280 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2281 return M.getGlobalVariable("__security_cookie");
2282 }
2283 return TargetLowering::getSDagStackGuard(M);
2284}
2285
2286Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2287 // MSVC CRT has a function to validate security cookie.
2288 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2289 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2290 return M.getFunction("__security_check_cookie");
2291 }
2292 return TargetLowering::getSSPStackGuardCheck(M);
2293}
2294
2295Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2296 if (Subtarget.getTargetTriple().isOSContiki())
2297 return getDefaultSafeStackPointerLocation(IRB, false);
2298
2299 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2300 // definition of TLS_SLOT_SAFESTACK in
2301 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2302 if (Subtarget.isTargetAndroid()) {
2303 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2304 // %gs:0x24 on i386
2305 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2306 return SegmentOffset(IRB, Offset, getAddressSpace());
2307 }
2308
2309 // Fuchsia is similar.
2310 if (Subtarget.isTargetFuchsia()) {
2311 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2312 return SegmentOffset(IRB, 0x18, getAddressSpace());
2313 }
2314
2315 return TargetLowering::getSafeStackPointerLocation(IRB);
2316}
2317
2318bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2319 unsigned DestAS) const {
2320 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2320, __PRETTY_FUNCTION__))
;
2321
2322 return SrcAS < 256 && DestAS < 256;
2323}
2324
2325//===----------------------------------------------------------------------===//
2326// Return Value Calling Convention Implementation
2327//===----------------------------------------------------------------------===//
2328
2329#include "X86GenCallingConv.inc"
2330
2331bool X86TargetLowering::CanLowerReturn(
2332 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2333 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2334 SmallVector<CCValAssign, 16> RVLocs;
2335 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2336 return CCInfo.CheckReturn(Outs, RetCC_X86);
2337}
2338
2339const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2340 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2341 return ScratchRegs;
2342}
2343
2344/// Lowers masks values (v*i1) to the local register values
2345/// \returns DAG node after lowering to register type
2346static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2347 const SDLoc &Dl, SelectionDAG &DAG) {
2348 EVT ValVT = ValArg.getValueType();
2349
2350 if (ValVT == MVT::v1i1)
2351 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2352 DAG.getIntPtrConstant(0, Dl));
2353
2354 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2355 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2356 // Two stage lowering might be required
2357 // bitcast: v8i1 -> i8 / v16i1 -> i16
2358 // anyextend: i8 -> i32 / i16 -> i32
2359 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2360 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2361 if (ValLoc == MVT::i32)
2362 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2363 return ValToCopy;
2364 }
2365
2366 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2367 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2368 // One stage lowering is required
2369 // bitcast: v32i1 -> i32 / v64i1 -> i64
2370 return DAG.getBitcast(ValLoc, ValArg);
2371 }
2372
2373 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2374}
2375
2376/// Breaks v64i1 value into two registers and adds the new node to the DAG
2377static void Passv64i1ArgInRegs(
2378 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2379 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2380 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2381 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2381, __PRETTY_FUNCTION__))
;
2382 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2382, __PRETTY_FUNCTION__))
;
2383 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2383, __PRETTY_FUNCTION__))
;
2384 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
2385 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
;
2386
2387 // Before splitting the value we cast it to i64
2388 Arg = DAG.getBitcast(MVT::i64, Arg);
2389
2390 // Splitting the value into two i32 types
2391 SDValue Lo, Hi;
2392 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2393 DAG.getConstant(0, Dl, MVT::i32));
2394 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2395 DAG.getConstant(1, Dl, MVT::i32));
2396
2397 // Attach the two i32 types into corresponding registers
2398 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2399 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2400}
2401
2402SDValue
2403X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2404 bool isVarArg,
2405 const SmallVectorImpl<ISD::OutputArg> &Outs,
2406 const SmallVectorImpl<SDValue> &OutVals,
2407 const SDLoc &dl, SelectionDAG &DAG) const {
2408 MachineFunction &MF = DAG.getMachineFunction();
2409 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2410
2411 // In some cases we need to disable registers from the default CSR list.
2412 // For example, when they are used for argument passing.
2413 bool ShouldDisableCalleeSavedRegister =
2414 CallConv == CallingConv::X86_RegCall ||
2415 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2416
2417 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2418 report_fatal_error("X86 interrupts may not return any value");
2419
2420 SmallVector<CCValAssign, 16> RVLocs;
2421 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2422 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2423
2424 SDValue Flag;
2425 SmallVector<SDValue, 6> RetOps;
2426 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2427 // Operand #1 = Bytes To Pop
2428 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2429 MVT::i32));
2430
2431 // Copy the result values into the output registers.
2432 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2433 ++I, ++OutsIndex) {
2434 CCValAssign &VA = RVLocs[I];
2435 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2435, __PRETTY_FUNCTION__))
;
2436
2437 // Add the register to the CalleeSaveDisableRegs list.
2438 if (ShouldDisableCalleeSavedRegister)
2439 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2440
2441 SDValue ValToCopy = OutVals[OutsIndex];
2442 EVT ValVT = ValToCopy.getValueType();
2443
2444 // Promote values to the appropriate types.
2445 if (VA.getLocInfo() == CCValAssign::SExt)
2446 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2447 else if (VA.getLocInfo() == CCValAssign::ZExt)
2448 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2449 else if (VA.getLocInfo() == CCValAssign::AExt) {
2450 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2451 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2452 else
2453 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2454 }
2455 else if (VA.getLocInfo() == CCValAssign::BCvt)
2456 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2457
2458 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2459, __PRETTY_FUNCTION__))
2459 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2459, __PRETTY_FUNCTION__))
;
2460
2461 // If this is x86-64, and we disabled SSE, we can't return FP values,
2462 // or SSE or MMX vectors.
2463 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2464 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2465 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2466 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2467 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2468 } else if (ValVT == MVT::f64 &&
2469 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2470 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2471 // llvm-gcc has never done it right and no one has noticed, so this
2472 // should be OK for now.
2473 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2474 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2475 }
2476
2477 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2478 // the RET instruction and handled by the FP Stackifier.
2479 if (VA.getLocReg() == X86::FP0 ||
2480 VA.getLocReg() == X86::FP1) {
2481 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2482 // change the value to the FP stack register class.
2483 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2484 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2485 RetOps.push_back(ValToCopy);
2486 // Don't emit a copytoreg.
2487 continue;
2488 }
2489
2490 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2491 // which is returned in RAX / RDX.
2492 if (Subtarget.is64Bit()) {
2493 if (ValVT == MVT::x86mmx) {
2494 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2495 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2496 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2497 ValToCopy);
2498 // If we don't have SSE2 available, convert to v4f32 so the generated
2499 // register is legal.
2500 if (!Subtarget.hasSSE2())
2501 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2502 }
2503 }
2504 }
2505
2506 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2507
2508 if (VA.needsCustom()) {
2509 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2510, __PRETTY_FUNCTION__))
2510 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2510, __PRETTY_FUNCTION__))
;
2511
2512 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2513 Subtarget);
2514
2515 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2516, __PRETTY_FUNCTION__))
2516 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2516, __PRETTY_FUNCTION__))
;
2517
2518 // Add the second register to the CalleeSaveDisableRegs list.
2519 if (ShouldDisableCalleeSavedRegister)
2520 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2521 } else {
2522 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2523 }
2524
2525 // Add nodes to the DAG and add the values into the RetOps list
2526 for (auto &Reg : RegsToPass) {
2527 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2528 Flag = Chain.getValue(1);
2529 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2530 }
2531 }
2532
2533 // Swift calling convention does not require we copy the sret argument
2534 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2535
2536 // All x86 ABIs require that for returning structs by value we copy
2537 // the sret argument into %rax/%eax (depending on ABI) for the return.
2538 // We saved the argument into a virtual register in the entry block,
2539 // so now we copy the value out and into %rax/%eax.
2540 //
2541 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2542 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2543 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2544 // either case FuncInfo->setSRetReturnReg() will have been called.
2545 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2546 // When we have both sret and another return value, we should use the
2547 // original Chain stored in RetOps[0], instead of the current Chain updated
2548 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2549
2550 // For the case of sret and another return value, we have
2551 // Chain_0 at the function entry
2552 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2553 // If we use Chain_1 in getCopyFromReg, we will have
2554 // Val = getCopyFromReg(Chain_1)
2555 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2556
2557 // getCopyToReg(Chain_0) will be glued together with
2558 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2559 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2560 // Data dependency from Unit B to Unit A due to usage of Val in
2561 // getCopyToReg(Chain_1, Val)
2562 // Chain dependency from Unit A to Unit B
2563
2564 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2565 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2566 getPointerTy(MF.getDataLayout()));
2567
2568 unsigned RetValReg
2569 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2570 X86::RAX : X86::EAX;
2571 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2572 Flag = Chain.getValue(1);
2573
2574 // RAX/EAX now acts like a return value.
2575 RetOps.push_back(
2576 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2577
2578 // Add the returned register to the CalleeSaveDisableRegs list.
2579 if (ShouldDisableCalleeSavedRegister)
2580 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2581 }
2582
2583 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2584 const MCPhysReg *I =
2585 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2586 if (I) {
2587 for (; *I; ++I) {
2588 if (X86::GR64RegClass.contains(*I))
2589 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2590 else
2591 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2591)
;
2592 }
2593 }
2594
2595 RetOps[0] = Chain; // Update chain.
2596
2597 // Add the flag if we have it.
2598 if (Flag.getNode())
2599 RetOps.push_back(Flag);
2600
2601 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2602 if (CallConv == CallingConv::X86_INTR)
2603 opcode = X86ISD::IRET;
2604 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2605}
2606
2607bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2608 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2609 return false;
2610
2611 SDValue TCChain = Chain;
2612 SDNode *Copy = *N->use_begin();
2613 if (Copy->getOpcode() == ISD::CopyToReg) {
2614 // If the copy has a glue operand, we conservatively assume it isn't safe to
2615 // perform a tail call.
2616 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2617 return false;
2618 TCChain = Copy->getOperand(0);
2619 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2620 return false;
2621
2622 bool HasRet = false;
2623 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2624 UI != UE; ++UI) {
2625 if (UI->getOpcode() != X86ISD::RET_FLAG)
2626 return false;
2627 // If we are returning more than one value, we can definitely
2628 // not make a tail call see PR19530
2629 if (UI->getNumOperands() > 4)
2630 return false;
2631 if (UI->getNumOperands() == 4 &&
2632 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2633 return false;
2634 HasRet = true;
2635 }
2636
2637 if (!HasRet)
2638 return false;
2639
2640 Chain = TCChain;
2641 return true;
2642}
2643
2644EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2645 ISD::NodeType ExtendKind) const {
2646 MVT ReturnMVT = MVT::i32;
2647
2648 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2649 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2650 // The ABI does not require i1, i8 or i16 to be extended.
2651 //
2652 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2653 // always extending i8/i16 return values, so keep doing that for now.
2654 // (PR26665).
2655 ReturnMVT = MVT::i8;
2656 }
2657
2658 EVT MinVT = getRegisterType(Context, ReturnMVT);
2659 return VT.bitsLT(MinVT) ? MinVT : VT;
2660}
2661
2662/// Reads two 32 bit registers and creates a 64 bit mask value.
2663/// \param VA The current 32 bit value that need to be assigned.
2664/// \param NextVA The next 32 bit value that need to be assigned.
2665/// \param Root The parent DAG node.
2666/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2667/// glue purposes. In the case the DAG is already using
2668/// physical register instead of virtual, we should glue
2669/// our new SDValue to InFlag SDvalue.
2670/// \return a new SDvalue of size 64bit.
2671static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2672 SDValue &Root, SelectionDAG &DAG,
2673 const SDLoc &Dl, const X86Subtarget &Subtarget,
2674 SDValue *InFlag = nullptr) {
2675 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2675, __PRETTY_FUNCTION__))
;
2676 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2676, __PRETTY_FUNCTION__))
;
2677 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2678, __PRETTY_FUNCTION__))
2678 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2678, __PRETTY_FUNCTION__))
;
2679 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2680, __PRETTY_FUNCTION__))
2680 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2680, __PRETTY_FUNCTION__))
;
2681 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2682, __PRETTY_FUNCTION__))
2682 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2682, __PRETTY_FUNCTION__))
;
2683
2684 SDValue Lo, Hi;
2685 unsigned Reg;
2686 SDValue ArgValueLo, ArgValueHi;
2687
2688 MachineFunction &MF = DAG.getMachineFunction();
2689 const TargetRegisterClass *RC = &X86::GR32RegClass;
2690
2691 // Read a 32 bit value from the registers.
2692 if (nullptr == InFlag) {
2693 // When no physical register is present,
2694 // create an intermediate virtual register.
2695 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2696 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2697 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2698 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2699 } else {
2700 // When a physical register is available read the value from it and glue
2701 // the reads together.
2702 ArgValueLo =
2703 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2704 *InFlag = ArgValueLo.getValue(2);
2705 ArgValueHi =
2706 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2707 *InFlag = ArgValueHi.getValue(2);
2708 }
2709
2710 // Convert the i32 type into v32i1 type.
2711 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2712
2713 // Convert the i32 type into v32i1 type.
2714 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2715
2716 // Concatenate the two values together.
2717 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2718}
2719
2720/// The function will lower a register of various sizes (8/16/32/64)
2721/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2722/// \returns a DAG node contains the operand after lowering to mask type.
2723static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2724 const EVT &ValLoc, const SDLoc &Dl,
2725 SelectionDAG &DAG) {
2726 SDValue ValReturned = ValArg;
2727
2728 if (ValVT == MVT::v1i1)
2729 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2730
2731 if (ValVT == MVT::v64i1) {
2732 // In 32 bit machine, this case is handled by getv64i1Argument
2733 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2733, __PRETTY_FUNCTION__))
;
2734 // In 64 bit machine, There is no need to truncate the value only bitcast
2735 } else {
2736 MVT maskLen;
2737 switch (ValVT.getSimpleVT().SimpleTy) {
2738 case MVT::v8i1:
2739 maskLen = MVT::i8;
2740 break;
2741 case MVT::v16i1:
2742 maskLen = MVT::i16;
2743 break;
2744 case MVT::v32i1:
2745 maskLen = MVT::i32;
2746 break;
2747 default:
2748 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2748)
;
2749 }
2750
2751 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2752 }
2753 return DAG.getBitcast(ValVT, ValReturned);
2754}
2755
2756/// Lower the result values of a call into the
2757/// appropriate copies out of appropriate physical registers.
2758///
2759SDValue X86TargetLowering::LowerCallResult(
2760 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2761 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2762 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2763 uint32_t *RegMask) const {
2764
2765 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2766 // Assign locations to each value returned by this call.
2767 SmallVector<CCValAssign, 16> RVLocs;
2768 bool Is64Bit = Subtarget.is64Bit();
2769 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2770 *DAG.getContext());
2771 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2772
2773 // Copy all of the result registers out of their specified physreg.
2774 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2775 ++I, ++InsIndex) {
2776 CCValAssign &VA = RVLocs[I];
2777 EVT CopyVT = VA.getLocVT();
2778
2779 // In some calling conventions we need to remove the used registers
2780 // from the register mask.
2781 if (RegMask) {
2782 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2783 SubRegs.isValid(); ++SubRegs)
2784 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2785 }
2786
2787 // If this is x86-64, and we disabled SSE, we can't return FP values
2788 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2789 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2790 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2791 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2792 }
2793
2794 // If we prefer to use the value in xmm registers, copy it out as f80 and
2795 // use a truncate to move it from fp stack reg to xmm reg.
2796 bool RoundAfterCopy = false;
2797 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2798 isScalarFPTypeInSSEReg(VA.getValVT())) {
2799 if (!Subtarget.hasX87())
2800 report_fatal_error("X87 register return with X87 disabled");
2801 CopyVT = MVT::f80;
2802 RoundAfterCopy = (CopyVT != VA.getLocVT());
2803 }
2804
2805 SDValue Val;
2806 if (VA.needsCustom()) {
2807 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2808, __PRETTY_FUNCTION__))
2808 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 2808, __PRETTY_FUNCTION__))
;
2809 Val =
2810 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2811 } else {
2812 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2813 .getValue(1);
2814 Val = Chain.getValue(0);
2815 InFlag = Chain.getValue(2);
2816 }
2817
2818 if (RoundAfterCopy)
2819 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2820 // This truncation won't change the value.
2821 DAG.getIntPtrConstant(1, dl));
2822
2823 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2824 if (VA.getValVT().isVector() &&
2825 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2826 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2827 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2828 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2829 } else
2830 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2831 }
2832
2833 InVals.push_back(Val);
2834 }
2835
2836 return Chain;
2837}
2838
2839//===----------------------------------------------------------------------===//
2840// C & StdCall & Fast Calling Convention implementation
2841//===----------------------------------------------------------------------===//
2842// StdCall calling convention seems to be standard for many Windows' API
2843// routines and around. It differs from C calling convention just a little:
2844// callee should clean up the stack, not caller. Symbols should be also
2845// decorated in some fancy way :) It doesn't support any vector arguments.
2846// For info on fast calling convention see Fast Calling Convention (tail call)
2847// implementation LowerX86_32FastCCCallTo.
2848
2849/// CallIsStructReturn - Determines whether a call uses struct return
2850/// semantics.
2851enum StructReturnType {
2852 NotStructReturn,
2853 RegStructReturn,
2854 StackStructReturn
2855};
2856static StructReturnType
2857callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2858 if (Outs.empty())
2859 return NotStructReturn;
2860
2861 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2862 if (!Flags.isSRet())
2863 return NotStructReturn;
2864 if (Flags.isInReg() || IsMCU)
2865 return RegStructReturn;
2866 return StackStructReturn;
2867}
2868
2869/// Determines whether a function uses struct return semantics.
2870static StructReturnType
2871argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2872 if (Ins.empty())
2873 return NotStructReturn;
2874
2875 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2876 if (!Flags.isSRet())
2877 return NotStructReturn;
2878 if (Flags.isInReg() || IsMCU)
2879 return RegStructReturn;
2880 return StackStructReturn;
2881}
2882
2883/// Make a copy of an aggregate at address specified by "Src" to address
2884/// "Dst" with size and alignment information specified by the specific
2885/// parameter attribute. The copy will be passed as a byval function parameter.
2886static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2887 SDValue Chain, ISD::ArgFlagsTy Flags,
2888 SelectionDAG &DAG, const SDLoc &dl) {
2889 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2890
2891 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2892 /*isVolatile*/false, /*AlwaysInline=*/true,
2893 /*isTailCall*/false,
2894 MachinePointerInfo(), MachinePointerInfo());
2895}
2896
2897/// Return true if the calling convention is one that we can guarantee TCO for.
2898static bool canGuaranteeTCO(CallingConv::ID CC) {
2899 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2900 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2901 CC == CallingConv::HHVM);
2902}
2903
2904/// Return true if we might ever do TCO for calls with this calling convention.
2905static bool mayTailCallThisCC(CallingConv::ID CC) {
2906 switch (CC) {
2907 // C calling conventions:
2908 case CallingConv::C:
2909 case CallingConv::Win64:
2910 case CallingConv::X86_64_SysV:
2911 // Callee pop conventions:
2912 case CallingConv::X86_ThisCall:
2913 case CallingConv::X86_StdCall:
2914 case CallingConv::X86_VectorCall:
2915 case CallingConv::X86_FastCall:
2916 return true;
2917 default:
2918 return canGuaranteeTCO(CC);
2919 }
2920}
2921
2922/// Return true if the function is being made into a tailcall target by
2923/// changing its ABI.
2924static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2925 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2926}
2927
2928bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2929 auto Attr =
2930 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2931 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2932 return false;
2933
2934 ImmutableCallSite CS(CI);
2935 CallingConv::ID CalleeCC = CS.getCallingConv();
2936 if (!mayTailCallThisCC(CalleeCC))
2937 return false;
2938
2939 return true;
2940}
2941
2942SDValue
2943X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2944 const SmallVectorImpl<ISD::InputArg> &Ins,
2945 const SDLoc &dl, SelectionDAG &DAG,
2946 const CCValAssign &VA,
2947 MachineFrameInfo &MFI, unsigned i) const {
2948 // Create the nodes corresponding to a load from this parameter slot.
2949 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2950 bool AlwaysUseMutable = shouldGuaranteeTCO(
2951 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2952 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2953 EVT ValVT;
2954 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2955
2956 // If value is passed by pointer we have address passed instead of the value
2957 // itself. No need to extend if the mask value and location share the same
2958 // absolute size.
2959 bool ExtendedInMem =
2960 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2961 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2962
2963 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2964 ValVT = VA.getLocVT();
2965 else
2966 ValVT = VA.getValVT();
2967
2968 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2969 // taken by a return address.
2970 int Offset = 0;
2971 if (CallConv == CallingConv::X86_INTR) {
2972 // X86 interrupts may take one or two arguments.
2973 // On the stack there will be no return address as in regular call.
2974 // Offset of last argument need to be set to -4/-8 bytes.
2975 // Where offset of the first argument out of two, should be set to 0 bytes.
2976 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2977 if (Subtarget.is64Bit() && Ins.size() == 2) {
2978 // The stack pointer needs to be realigned for 64 bit handlers with error
2979 // code, so the argument offset changes by 8 bytes.
2980 Offset += 8;
2981 }
2982 }
2983
2984 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2985 // changed with more analysis.
2986 // In case of tail call optimization mark all arguments mutable. Since they
2987 // could be overwritten by lowering of arguments in case of a tail call.
2988 if (Flags.isByVal()) {
2989 unsigned Bytes = Flags.getByValSize();
2990 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2991
2992 // FIXME: For now, all byval parameter objects are marked as aliasing. This
2993 // can be improved with deeper analysis.
2994 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
2995 /*isAliased=*/true);
2996 // Adjust SP offset of interrupt parameter.
2997 if (CallConv == CallingConv::X86_INTR) {
2998 MFI.setObjectOffset(FI, Offset);
2999 }
3000 return DAG.getFrameIndex(FI, PtrVT);
3001 }
3002
3003 // This is an argument in memory. We might be able to perform copy elision.
3004 if (Flags.isCopyElisionCandidate()) {
3005 EVT ArgVT = Ins[i].ArgVT;
3006 SDValue PartAddr;
3007 if (Ins[i].PartOffset == 0) {
3008 // If this is a one-part value or the first part of a multi-part value,
3009 // create a stack object for the entire argument value type and return a
3010 // load from our portion of it. This assumes that if the first part of an
3011 // argument is in memory, the rest will also be in memory.
3012 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3013 /*Immutable=*/false);
3014 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3015 return DAG.getLoad(
3016 ValVT, dl, Chain, PartAddr,
3017 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3018 } else {
3019 // This is not the first piece of an argument in memory. See if there is
3020 // already a fixed stack object including this offset. If so, assume it
3021 // was created by the PartOffset == 0 branch above and create a load from
3022 // the appropriate offset into it.
3023 int64_t PartBegin = VA.getLocMemOffset();
3024 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3025 int FI = MFI.getObjectIndexBegin();
3026 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3027 int64_t ObjBegin = MFI.getObjectOffset(FI);
3028 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3029 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3030 break;
3031 }
3032 if (MFI.isFixedObjectIndex(FI)) {
3033 SDValue Addr =
3034 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3035 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3036 return DAG.getLoad(
3037 ValVT, dl, Chain, Addr,
3038 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3039 Ins[i].PartOffset));
3040 }
3041 }
3042 }
3043
3044 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3045 VA.getLocMemOffset(), isImmutable);
3046
3047 // Set SExt or ZExt flag.
3048 if (VA.getLocInfo() == CCValAssign::ZExt) {
3049 MFI.setObjectZExt(FI, true);
3050 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3051 MFI.setObjectSExt(FI, true);
3052 }
3053
3054 // Adjust SP offset of interrupt parameter.
3055 if (CallConv == CallingConv::X86_INTR) {
3056 MFI.setObjectOffset(FI, Offset);
3057 }
3058
3059 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3060 SDValue Val = DAG.getLoad(
3061 ValVT, dl, Chain, FIN,
3062 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3063 return ExtendedInMem
3064 ? (VA.getValVT().isVector()
3065 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3066 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3067 : Val;
3068}
3069
3070// FIXME: Get this from tablegen.
3071static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3072 const X86Subtarget &Subtarget) {
3073 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3073, __PRETTY_FUNCTION__))
;
3074
3075 if (Subtarget.isCallingConvWin64(CallConv)) {
3076 static const MCPhysReg GPR64ArgRegsWin64[] = {
3077 X86::RCX, X86::RDX, X86::R8, X86::R9
3078 };
3079 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3080 }
3081
3082 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3083 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3084 };
3085 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3086}
3087
3088// FIXME: Get this from tablegen.
3089static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3090 CallingConv::ID CallConv,
3091 const X86Subtarget &Subtarget) {
3092 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3092, __PRETTY_FUNCTION__))
;
3093 if (Subtarget.isCallingConvWin64(CallConv)) {
3094 // The XMM registers which might contain var arg parameters are shadowed
3095 // in their paired GPR. So we only need to save the GPR to their home
3096 // slots.
3097 // TODO: __vectorcall will change this.
3098 return None;
3099 }
3100
3101 const Function &F = MF.getFunction();
3102 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3103 bool isSoftFloat = Subtarget.useSoftFloat();
3104 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3105, __PRETTY_FUNCTION__))
3105 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3105, __PRETTY_FUNCTION__))
;
3106 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3107 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3108 // registers.
3109 return None;
3110
3111 static const MCPhysReg XMMArgRegs64Bit[] = {
3112 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3113 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3114 };
3115 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3116}
3117
3118#ifndef NDEBUG
3119static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3120 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3121 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3122 return A.getValNo() < B.getValNo();
3123 });
3124}
3125#endif
3126
3127SDValue X86TargetLowering::LowerFormalArguments(
3128 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3129 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3130 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3131 MachineFunction &MF = DAG.getMachineFunction();
3132 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3133 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3134
3135 const Function &F = MF.getFunction();
3136 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3137 F.getName() == "main")
3138 FuncInfo->setForceFramePointer(true);
3139
3140 MachineFrameInfo &MFI = MF.getFrameInfo();
3141 bool Is64Bit = Subtarget.is64Bit();
3142 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3143
3144 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3146, __PRETTY_FUNCTION__))
3145 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3146, __PRETTY_FUNCTION__))
3146 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3146, __PRETTY_FUNCTION__))
;
3147
3148 if (CallConv == CallingConv::X86_INTR) {
3149 bool isLegal = Ins.size() == 1 ||
3150 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
3151 (!Is64Bit && Ins[1].VT == MVT::i32)));
3152 if (!isLegal)
3153 report_fatal_error("X86 interrupts may take one or two arguments");
3154 }
3155
3156 // Assign locations to all of the incoming arguments.
3157 SmallVector<CCValAssign, 16> ArgLocs;
3158 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3159
3160 // Allocate shadow area for Win64.
3161 if (IsWin64)
3162 CCInfo.AllocateStack(32, 8);
3163
3164 CCInfo.AnalyzeArguments(Ins, CC_X86);
3165
3166 // In vectorcall calling convention a second pass is required for the HVA
3167 // types.
3168 if (CallingConv::X86_VectorCall == CallConv) {
3169 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3170 }
3171
3172 // The next loop assumes that the locations are in the same order of the
3173 // input arguments.
3174 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3175, __PRETTY_FUNCTION__))
3175 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3175, __PRETTY_FUNCTION__))
;
3176
3177 SDValue ArgValue;
3178 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3179 ++I, ++InsIndex) {
3180 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3180, __PRETTY_FUNCTION__))
;
3181 CCValAssign &VA = ArgLocs[I];
3182
3183 if (VA.isRegLoc()) {
3184 EVT RegVT = VA.getLocVT();
3185 if (VA.needsCustom()) {
3186 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3188, __PRETTY_FUNCTION__))
3187 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3188, __PRETTY_FUNCTION__))
3188 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3188, __PRETTY_FUNCTION__))
;
3189
3190 // v64i1 values, in regcall calling convention, that are
3191 // compiled to 32 bit arch, are split up into two registers.
3192 ArgValue =
3193 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3194 } else {
3195 const TargetRegisterClass *RC;
3196 if (RegVT == MVT::i8)
3197 RC = &X86::GR8RegClass;
3198 else if (RegVT == MVT::i16)
3199 RC = &X86::GR16RegClass;
3200 else if (RegVT == MVT::i32)
3201 RC = &X86::GR32RegClass;
3202 else if (Is64Bit && RegVT == MVT::i64)
3203 RC = &X86::GR64RegClass;
3204 else if (RegVT == MVT::f32)
3205 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3206 else if (RegVT == MVT::f64)
3207 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3208 else if (RegVT == MVT::f80)
3209 RC = &X86::RFP80RegClass;
3210 else if (RegVT == MVT::f128)
3211 RC = &X86::VR128RegClass;
3212 else if (RegVT.is512BitVector())
3213 RC = &X86::VR512RegClass;
3214 else if (RegVT.is256BitVector())
3215 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3216 else if (RegVT.is128BitVector())
3217 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3218 else if (RegVT == MVT::x86mmx)
3219 RC = &X86::VR64RegClass;
3220 else if (RegVT == MVT::v1i1)
3221 RC = &X86::VK1RegClass;
3222 else if (RegVT == MVT::v8i1)
3223 RC = &X86::VK8RegClass;
3224 else if (RegVT == MVT::v16i1)
3225 RC = &X86::VK16RegClass;
3226 else if (RegVT == MVT::v32i1)
3227 RC = &X86::VK32RegClass;
3228 else if (RegVT == MVT::v64i1)
3229 RC = &X86::VK64RegClass;
3230 else
3231 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3231)
;
3232
3233 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3234 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3235 }
3236
3237 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3238 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3239 // right size.
3240 if (VA.getLocInfo() == CCValAssign::SExt)
3241 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3242 DAG.getValueType(VA.getValVT()));
3243 else if (VA.getLocInfo() == CCValAssign::ZExt)
3244 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3245 DAG.getValueType(VA.getValVT()));
3246 else if (VA.getLocInfo() == CCValAssign::BCvt)
3247 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3248
3249 if (VA.isExtInLoc()) {
3250 // Handle MMX values passed in XMM regs.
3251 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3252 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3253 else if (VA.getValVT().isVector() &&
3254 VA.getValVT().getScalarType() == MVT::i1 &&
3255 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3256 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3257 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3258 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3259 } else
3260 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3261 }
3262 } else {
3263 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3263, __PRETTY_FUNCTION__))
;
3264 ArgValue =
3265 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3266 }
3267
3268 // If value is passed via pointer - do a load.
3269 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3270 ArgValue =
3271 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3272
3273 InVals.push_back(ArgValue);
3274 }
3275
3276 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3277 // Swift calling convention does not require we copy the sret argument
3278 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3279 if (CallConv == CallingConv::Swift)
3280 continue;
3281
3282 // All x86 ABIs require that for returning structs by value we copy the
3283 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3284 // the argument into a virtual register so that we can access it from the
3285 // return points.
3286 if (Ins[I].Flags.isSRet()) {
3287 unsigned Reg = FuncInfo->getSRetReturnReg();
3288 if (!Reg) {
3289 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3290 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3291 FuncInfo->setSRetReturnReg(Reg);
3292 }
3293 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3294 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3295 break;
3296 }
3297 }
3298
3299 unsigned StackSize = CCInfo.getNextStackOffset();
3300 // Align stack specially for tail calls.
3301 if (shouldGuaranteeTCO(CallConv,
3302 MF.getTarget().Options.GuaranteedTailCallOpt))
3303 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3304
3305 // If the function takes variable number of arguments, make a frame index for
3306 // the start of the first vararg value... for expansion of llvm.va_start. We
3307 // can skip this if there are no va_start calls.
3308 if (MFI.hasVAStart() &&
3309 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3310 CallConv != CallingConv::X86_ThisCall))) {
3311 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3312 }
3313
3314 // Figure out if XMM registers are in use.
3315 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3317, __PRETTY_FUNCTION__))
3316 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3317, __PRETTY_FUNCTION__))
3317 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3317, __PRETTY_FUNCTION__))
;
3318
3319 // 64-bit calling conventions support varargs and register parameters, so we
3320 // have to do extra work to spill them in the prologue.
3321 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3322 // Find the first unallocated argument registers.
3323 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3324 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3325 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3326 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3327 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3328, __PRETTY_FUNCTION__))
3328 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3328, __PRETTY_FUNCTION__))
;
3329
3330 // Gather all the live in physical registers.
3331 SmallVector<SDValue, 6> LiveGPRs;
3332 SmallVector<SDValue, 8> LiveXMMRegs;
3333 SDValue ALVal;
3334 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3335 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3336 LiveGPRs.push_back(
3337 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3338 }
3339 if (!ArgXMMs.empty()) {
3340 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3341 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3342 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3343 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3344 LiveXMMRegs.push_back(
3345 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3346 }
3347 }
3348
3349 if (IsWin64) {
3350 // Get to the caller-allocated home save location. Add 8 to account
3351 // for the return address.
3352 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3353 FuncInfo->setRegSaveFrameIndex(
3354 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3355 // Fixup to set vararg frame on shadow area (4 x i64).
3356 if (NumIntRegs < 4)
3357 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3358 } else {
3359 // For X86-64, if there are vararg parameters that are passed via
3360 // registers, then we must store them to their spots on the stack so
3361 // they may be loaded by dereferencing the result of va_next.
3362 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3363 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3364 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3365 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3366 }
3367
3368 // Store the integer parameter registers.
3369 SmallVector<SDValue, 8> MemOps;
3370 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3371 getPointerTy(DAG.getDataLayout()));
3372 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3373 for (SDValue Val : LiveGPRs) {
3374 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3375 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3376 SDValue Store =
3377 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3378 MachinePointerInfo::getFixedStack(
3379 DAG.getMachineFunction(),
3380 FuncInfo->getRegSaveFrameIndex(), Offset));
3381 MemOps.push_back(Store);
3382 Offset += 8;
3383 }
3384
3385 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3386 // Now store the XMM (fp + vector) parameter registers.
3387 SmallVector<SDValue, 12> SaveXMMOps;
3388 SaveXMMOps.push_back(Chain);
3389 SaveXMMOps.push_back(ALVal);
3390 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3391 FuncInfo->getRegSaveFrameIndex(), dl));
3392 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3393 FuncInfo->getVarArgsFPOffset(), dl));
3394 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3395 LiveXMMRegs.end());
3396 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3397 MVT::Other, SaveXMMOps));
3398 }
3399
3400 if (!MemOps.empty())
3401 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3402 }
3403
3404 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3405 // Find the largest legal vector type.
3406 MVT VecVT = MVT::Other;
3407 // FIXME: Only some x86_32 calling conventions support AVX512.
3408 if (Subtarget.hasAVX512() &&
3409 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3410 CallConv == CallingConv::Intel_OCL_BI)))
3411 VecVT = MVT::v16f32;
3412 else if (Subtarget.hasAVX())
3413 VecVT = MVT::v8f32;
3414 else if (Subtarget.hasSSE2())
3415 VecVT = MVT::v4f32;
3416
3417 // We forward some GPRs and some vector types.
3418 SmallVector<MVT, 2> RegParmTypes;
3419 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3420 RegParmTypes.push_back(IntVT);
3421 if (VecVT != MVT::Other)
3422 RegParmTypes.push_back(VecVT);
3423
3424 // Compute the set of forwarded registers. The rest are scratch.
3425 SmallVectorImpl<ForwardedRegister> &Forwards =
3426 FuncInfo->getForwardedMustTailRegParms();
3427 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3428
3429 // Conservatively forward AL on x86_64, since it might be used for varargs.
3430 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3431 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3432 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3433 }
3434
3435 // Copy all forwards from physical to virtual registers.
3436 for (ForwardedRegister &F : Forwards) {
3437 // FIXME: Can we use a less constrained schedule?
3438 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3439 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3440 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3441 }
3442 }
3443
3444 // Some CCs need callee pop.
3445 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3446 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3447 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3448 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3449 // X86 interrupts must pop the error code (and the alignment padding) if
3450 // present.
3451 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3452 } else {
3453 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3454 // If this is an sret function, the return should pop the hidden pointer.
3455 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3456 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3457 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3458 FuncInfo->setBytesToPopOnReturn(4);
3459 }
3460
3461 if (!Is64Bit) {
3462 // RegSaveFrameIndex is X86-64 only.
3463 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3464 if (CallConv == CallingConv::X86_FastCall ||
3465 CallConv == CallingConv::X86_ThisCall)
3466 // fastcc functions can't have varargs.
3467 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3468 }
3469
3470 FuncInfo->setArgumentStackSize(StackSize);
3471
3472 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3473 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3474 if (Personality == EHPersonality::CoreCLR) {
3475 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3475, __PRETTY_FUNCTION__))
;
3476 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3477 // that we'd prefer this slot be allocated towards the bottom of the frame
3478 // (i.e. near the stack pointer after allocating the frame). Every
3479 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3480 // offset from the bottom of this and each funclet's frame must be the
3481 // same, so the size of funclets' (mostly empty) frames is dictated by
3482 // how far this slot is from the bottom (since they allocate just enough
3483 // space to accommodate holding this slot at the correct offset).
3484 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3485 EHInfo->PSPSymFrameIdx = PSPSymFI;
3486 }
3487 }
3488
3489 if (CallConv == CallingConv::X86_RegCall ||
3490 F.hasFnAttribute("no_caller_saved_registers")) {
3491 MachineRegisterInfo &MRI = MF.getRegInfo();
3492 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3493 MRI.disableCalleeSavedRegister(Pair.first);
3494 }
3495
3496 return Chain;
3497}
3498
3499SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3500 SDValue Arg, const SDLoc &dl,
3501 SelectionDAG &DAG,
3502 const CCValAssign &VA,
3503 ISD::ArgFlagsTy Flags) const {
3504 unsigned LocMemOffset = VA.getLocMemOffset();
3505 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3506 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3507 StackPtr, PtrOff);
3508 if (Flags.isByVal())
3509 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3510
3511 return DAG.getStore(
3512 Chain, dl, Arg, PtrOff,
3513 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3514}
3515
3516/// Emit a load of return address if tail call
3517/// optimization is performed and it is required.
3518SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3519 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3520 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3521 // Adjust the Return address stack slot.
3522 EVT VT = getPointerTy(DAG.getDataLayout());
3523 OutRetAddr = getReturnAddressFrameIndex(DAG);
3524
3525 // Load the "old" Return address.
3526 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3527 return SDValue(OutRetAddr.getNode(), 1);
3528}
3529
3530/// Emit a store of the return address if tail call
3531/// optimization is performed and it is required (FPDiff!=0).
3532static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3533 SDValue Chain, SDValue RetAddrFrIdx,
3534 EVT PtrVT, unsigned SlotSize,
3535 int FPDiff, const SDLoc &dl) {
3536 // Store the return address to the appropriate stack slot.
3537 if (!FPDiff) return Chain;
3538 // Calculate the new stack slot for the return address.
3539 int NewReturnAddrFI =
3540 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3541 false);
3542 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3543 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3544 MachinePointerInfo::getFixedStack(
3545 DAG.getMachineFunction(), NewReturnAddrFI));
3546 return Chain;
3547}
3548
3549/// Returns a vector_shuffle mask for an movs{s|d}, movd
3550/// operation of specified width.
3551static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3552 SDValue V2) {
3553 unsigned NumElems = VT.getVectorNumElements();
3554 SmallVector<int, 8> Mask;
3555 Mask.push_back(NumElems);
3556 for (unsigned i = 1; i != NumElems; ++i)
3557 Mask.push_back(i);
3558 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3559}
3560
3561SDValue
3562X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3563 SmallVectorImpl<SDValue> &InVals) const {
3564 SelectionDAG &DAG = CLI.DAG;
3565 SDLoc &dl = CLI.DL;
3566 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3567 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3568 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3569 SDValue Chain = CLI.Chain;
3570 SDValue Callee = CLI.Callee;
3571 CallingConv::ID CallConv = CLI.CallConv;
3572 bool &isTailCall = CLI.IsTailCall;
3573 bool isVarArg = CLI.IsVarArg;
3574
3575 MachineFunction &MF = DAG.getMachineFunction();
3576 bool Is64Bit = Subtarget.is64Bit();
3577 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3578 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3579 bool IsSibcall = false;
3580 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3581 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3582 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3583 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3584 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3585 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3586 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3587 bool HasNoCfCheck =
3588 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3589 const Module *M = MF.getMMI().getModule();
3590 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3591
3592 if (CallConv == CallingConv::X86_INTR)
3593 report_fatal_error("X86 interrupts may not be called directly");
3594
3595 if (Attr.getValueAsString() == "true")
3596 isTailCall = false;
3597
3598 if (Subtarget.isPICStyleGOT() &&
3599 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3600 // If we are using a GOT, disable tail calls to external symbols with
3601 // default visibility. Tail calling such a symbol requires using a GOT
3602 // relocation, which forces early binding of the symbol. This breaks code
3603 // that require lazy function symbol resolution. Using musttail or
3604 // GuaranteedTailCallOpt will override this.
3605 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3606 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3607 G->getGlobal()->hasDefaultVisibility()))
3608 isTailCall = false;
3609 }
3610
3611 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3612 if (IsMustTail) {
3613 // Force this to be a tail call. The verifier rules are enough to ensure
3614 // that we can lower this successfully without moving the return address
3615 // around.
3616 isTailCall = true;
3617 } else if (isTailCall) {
3618 // Check if it's really possible to do a tail call.
3619 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3620 isVarArg, SR != NotStructReturn,
3621 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3622 Outs, OutVals, Ins, DAG);
3623
3624 // Sibcalls are automatically detected tailcalls which do not require
3625 // ABI changes.
3626 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3627 IsSibcall = true;
3628
3629 if (isTailCall)
3630 ++NumTailCalls;
3631 }
3632
3633 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3634, __PRETTY_FUNCTION__))
3634 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3634, __PRETTY_FUNCTION__))
;
3635
3636 // Analyze operands of the call, assigning locations to each operand.
3637 SmallVector<CCValAssign, 16> ArgLocs;
3638 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3639
3640 // Allocate shadow area for Win64.
3641 if (IsWin64)
3642 CCInfo.AllocateStack(32, 8);
3643
3644 CCInfo.AnalyzeArguments(Outs, CC_X86);
3645
3646 // In vectorcall calling convention a second pass is required for the HVA
3647 // types.
3648 if (CallingConv::X86_VectorCall == CallConv) {
3649 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3650 }
3651
3652 // Get a count of how many bytes are to be pushed on the stack.
3653 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3654 if (IsSibcall)
3655 // This is a sibcall. The memory operands are available in caller's
3656 // own caller's stack.
3657 NumBytes = 0;
3658 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3659 canGuaranteeTCO(CallConv))
3660 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3661
3662 int FPDiff = 0;
3663 if (isTailCall && !IsSibcall && !IsMustTail) {
3664 // Lower arguments at fp - stackoffset + fpdiff.
3665 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3666
3667 FPDiff = NumBytesCallerPushed - NumBytes;
3668
3669 // Set the delta of movement of the returnaddr stackslot.
3670 // But only set if delta is greater than previous delta.
3671 if (FPDiff < X86Info->getTCReturnAddrDelta())
3672 X86Info->setTCReturnAddrDelta(FPDiff);
3673 }
3674
3675 unsigned NumBytesToPush = NumBytes;
3676 unsigned NumBytesToPop = NumBytes;
3677
3678 // If we have an inalloca argument, all stack space has already been allocated
3679 // for us and be right at the top of the stack. We don't support multiple
3680 // arguments passed in memory when using inalloca.
3681 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3682 NumBytesToPush = 0;
3683 if (!ArgLocs.back().isMemLoc())
3684 report_fatal_error("cannot use inalloca attribute on a register "
3685 "parameter");
3686 if (ArgLocs.back().getLocMemOffset() != 0)
3687 report_fatal_error("any parameter with the inalloca attribute must be "
3688 "the only memory argument");
3689 }
3690
3691 if (!IsSibcall)
3692 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3693 NumBytes - NumBytesToPush, dl);
3694
3695 SDValue RetAddrFrIdx;
3696 // Load return address for tail calls.
3697 if (isTailCall && FPDiff)
3698 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3699 Is64Bit, FPDiff, dl);
3700
3701 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3702 SmallVector<SDValue, 8> MemOpChains;
3703 SDValue StackPtr;
3704
3705 // The next loop assumes that the locations are in the same order of the
3706 // input arguments.
3707 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3708, __PRETTY_FUNCTION__))
3708 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3708, __PRETTY_FUNCTION__))
;
3709
3710 // Walk the register/memloc assignments, inserting copies/loads. In the case
3711 // of tail call optimization arguments are handle later.
3712 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3713 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3714 ++I, ++OutIndex) {
3715 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3715, __PRETTY_FUNCTION__))
;
3716 // Skip inalloca arguments, they have already been written.
3717 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3718 if (Flags.isInAlloca())
3719 continue;
3720
3721 CCValAssign &VA = ArgLocs[I];
3722 EVT RegVT = VA.getLocVT();
3723 SDValue Arg = OutVals[OutIndex];
3724 bool isByVal = Flags.isByVal();
3725
3726 // Promote the value if needed.
3727 switch (VA.getLocInfo()) {
3728 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3728)
;
3729 case CCValAssign::Full: break;
3730 case CCValAssign::SExt:
3731 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3732 break;
3733 case CCValAssign::ZExt:
3734 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3735 break;
3736 case CCValAssign::AExt:
3737 if (Arg.getValueType().isVector() &&
3738 Arg.getValueType().getVectorElementType() == MVT::i1)
3739 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3740 else if (RegVT.is128BitVector()) {
3741 // Special case: passing MMX values in XMM registers.
3742 Arg = DAG.getBitcast(MVT::i64, Arg);
3743 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3744 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3745 } else
3746 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3747 break;
3748 case CCValAssign::BCvt:
3749 Arg = DAG.getBitcast(RegVT, Arg);
3750 break;
3751 case CCValAssign::Indirect: {
3752 if (isByVal) {
3753 // Memcpy the argument to a temporary stack slot to prevent
3754 // the caller from seeing any modifications the callee may make
3755 // as guaranteed by the `byval` attribute.
3756 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3757 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3758 false);
3759 SDValue StackSlot =
3760 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3761 Chain =
3762 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3763 // From now on treat this as a regular pointer
3764 Arg = StackSlot;
3765 isByVal = false;
3766 } else {
3767 // Store the argument.
3768 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3769 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3770 Chain = DAG.getStore(
3771 Chain, dl, Arg, SpillSlot,
3772 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3773 Arg = SpillSlot;
3774 }
3775 break;
3776 }
3777 }
3778
3779 if (VA.needsCustom()) {
3780 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3781, __PRETTY_FUNCTION__))
3781 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3781, __PRETTY_FUNCTION__))
;
3782 // Split v64i1 value into two registers
3783 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3784 Subtarget);
3785 } else if (VA.isRegLoc()) {
3786 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3787 if (isVarArg && IsWin64) {
3788 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3789 // shadow reg if callee is a varargs function.
3790 unsigned ShadowReg = 0;
3791 switch (VA.getLocReg()) {
3792 case X86::XMM0: ShadowReg = X86::RCX; break;
3793 case X86::XMM1: ShadowReg = X86::RDX; break;
3794 case X86::XMM2: ShadowReg = X86::R8; break;
3795 case X86::XMM3: ShadowReg = X86::R9; break;
3796 }
3797 if (ShadowReg)
3798 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3799 }
3800 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3801 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3801, __PRETTY_FUNCTION__))
;
3802 if (!StackPtr.getNode())
3803 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3804 getPointerTy(DAG.getDataLayout()));
3805 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3806 dl, DAG, VA, Flags));
3807 }
3808 }
3809
3810 if (!MemOpChains.empty())
3811 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3812
3813 if (Subtarget.isPICStyleGOT()) {
3814 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3815 // GOT pointer.
3816 if (!isTailCall) {
3817 RegsToPass.push_back(std::make_pair(
3818 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3819 getPointerTy(DAG.getDataLayout()))));
3820 } else {
3821 // If we are tail calling and generating PIC/GOT style code load the
3822 // address of the callee into ECX. The value in ecx is used as target of
3823 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3824 // for tail calls on PIC/GOT architectures. Normally we would just put the
3825 // address of GOT into ebx and then call target@PLT. But for tail calls
3826 // ebx would be restored (since ebx is callee saved) before jumping to the
3827 // target@PLT.
3828
3829 // Note: The actual moving to ECX is done further down.
3830 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3831 if (G && !G->getGlobal()->hasLocalLinkage() &&
3832 G->getGlobal()->hasDefaultVisibility())
3833 Callee = LowerGlobalAddress(Callee, DAG);
3834 else if (isa<ExternalSymbolSDNode>(Callee))
3835 Callee = LowerExternalSymbol(Callee, DAG);
3836 }
3837 }
3838
3839 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3840 // From AMD64 ABI document:
3841 // For calls that may call functions that use varargs or stdargs
3842 // (prototype-less calls or calls to functions containing ellipsis (...) in
3843 // the declaration) %al is used as hidden argument to specify the number
3844 // of SSE registers used. The contents of %al do not need to match exactly
3845 // the number of registers, but must be an ubound on the number of SSE
3846 // registers used and is in the range 0 - 8 inclusive.
3847
3848 // Count the number of XMM registers allocated.
3849 static const MCPhysReg XMMArgRegs[] = {
3850 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3851 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3852 };
3853 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3854 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3855, __PRETTY_FUNCTION__))
3855 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3855, __PRETTY_FUNCTION__))
;
3856
3857 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3858 DAG.getConstant(NumXMMRegs, dl,
3859 MVT::i8)));
3860 }
3861
3862 if (isVarArg && IsMustTail) {
3863 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3864 for (const auto &F : Forwards) {
3865 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3866 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3867 }
3868 }
3869
3870 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3871 // don't need this because the eligibility check rejects calls that require
3872 // shuffling arguments passed in memory.
3873 if (!IsSibcall && isTailCall) {
3874 // Force all the incoming stack arguments to be loaded from the stack
3875 // before any new outgoing arguments are stored to the stack, because the
3876 // outgoing stack slots may alias the incoming argument stack slots, and
3877 // the alias isn't otherwise explicit. This is slightly more conservative
3878 // than necessary, because it means that each store effectively depends
3879 // on every argument instead of just those arguments it would clobber.
3880 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3881
3882 SmallVector<SDValue, 8> MemOpChains2;
3883 SDValue FIN;
3884 int FI = 0;
3885 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3886 ++I, ++OutsIndex) {
3887 CCValAssign &VA = ArgLocs[I];
3888
3889 if (VA.isRegLoc()) {
3890 if (VA.needsCustom()) {
3891 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3892, __PRETTY_FUNCTION__))
3892 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3892, __PRETTY_FUNCTION__))
;
3893 // This means that we are in special case where one argument was
3894 // passed through two register locations - Skip the next location
3895 ++I;
3896 }
3897
3898 continue;
3899 }
3900
3901 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3901, __PRETTY_FUNCTION__))
;
3902 SDValue Arg = OutVals[OutsIndex];
3903 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3904 // Skip inalloca arguments. They don't require any work.
3905 if (Flags.isInAlloca())
3906 continue;
3907 // Create frame index.
3908 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3909 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3910 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3911 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3912
3913 if (Flags.isByVal()) {
3914 // Copy relative to framepointer.
3915 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3916 if (!StackPtr.getNode())
3917 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3918 getPointerTy(DAG.getDataLayout()));
3919 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3920 StackPtr, Source);
3921
3922 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3923 ArgChain,
3924 Flags, DAG, dl));
3925 } else {
3926 // Store relative to framepointer.
3927 MemOpChains2.push_back(DAG.getStore(
3928 ArgChain, dl, Arg, FIN,
3929 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3930 }
3931 }
3932
3933 if (!MemOpChains2.empty())
3934 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3935
3936 // Store the return address to the appropriate stack slot.
3937 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3938 getPointerTy(DAG.getDataLayout()),
3939 RegInfo->getSlotSize(), FPDiff, dl);
3940 }
3941
3942 // Build a sequence of copy-to-reg nodes chained together with token chain
3943 // and flag operands which copy the outgoing args into registers.
3944 SDValue InFlag;
3945 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3946 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3947 RegsToPass[i].second, InFlag);
3948 InFlag = Chain.getValue(1);
3949 }
3950
3951 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3952 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 3952, __PRETTY_FUNCTION__))
;
3953 // In the 64-bit large code model, we have to make all calls
3954 // through a register, since the call instruction's 32-bit
3955 // pc-relative offset may not be large enough to hold the whole
3956 // address.
3957 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3958 // If the callee is a GlobalAddress node (quite common, every direct call
3959 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3960 // it.
3961 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3962
3963 // We should use extra load for direct calls to dllimported functions in
3964 // non-JIT mode.
3965 const GlobalValue *GV = G->getGlobal();
3966 if (!GV->hasDLLImportStorageClass()) {
3967 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3968
3969 Callee = DAG.getTargetGlobalAddress(
3970 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3971
3972 if (OpFlags == X86II::MO_GOTPCREL) {
3973 // Add a wrapper.
3974 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3975 getPointerTy(DAG.getDataLayout()), Callee);
3976 // Add extra indirection
3977 Callee = DAG.getLoad(
3978 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3979 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3980 }
3981 }
3982 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3983 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3984 unsigned char OpFlags =
3985 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3986
3987 Callee = DAG.getTargetExternalSymbol(
3988 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3989
3990 if (OpFlags == X86II::MO_GOTPCREL) {
3991 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3992 getPointerTy(DAG.getDataLayout()), Callee);
3993 Callee = DAG.getLoad(
3994 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3995 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3996 }
3997 } else if (Subtarget.isTarget64BitILP32() &&
3998 Callee->getValueType(0) == MVT::i32) {
3999 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4000 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4001 }
4002
4003 // Returns a chain & a flag for retval copy to use.
4004 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4005 SmallVector<SDValue, 8> Ops;
4006
4007 if (!IsSibcall && isTailCall) {
4008 Chain = DAG.getCALLSEQ_END(Chain,
4009 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4010 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4011 InFlag = Chain.getValue(1);
4012 }
4013
4014 Ops.push_back(Chain);
4015 Ops.push_back(Callee);
4016
4017 if (isTailCall)
4018 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4019
4020 // Add argument registers to the end of the list so that they are known live
4021 // into the call.
4022 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4023 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4024 RegsToPass[i].second.getValueType()));
4025
4026 // Add a register mask operand representing the call-preserved registers.
4027 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4028 // set X86_INTR calling convention because it has the same CSR mask
4029 // (same preserved registers).
4030 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4031 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4032 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
;
4033
4034 // If this is an invoke in a 32-bit function using a funclet-based
4035 // personality, assume the function clobbers all registers. If an exception
4036 // is thrown, the runtime will not restore CSRs.
4037 // FIXME: Model this more precisely so that we can register allocate across
4038 // the normal edge and spill and fill across the exceptional edge.
4039 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4040 const Function &CallerFn = MF.getFunction();
4041 EHPersonality Pers =
4042 CallerFn.hasPersonalityFn()
4043 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4044 : EHPersonality::Unknown;
4045 if (isFuncletEHPersonality(Pers))
4046 Mask = RegInfo->getNoPreservedMask();
4047 }
4048
4049 // Define a new register mask from the existing mask.
4050 uint32_t *RegMask = nullptr;
4051
4052 // In some calling conventions we need to remove the used physical registers
4053 // from the reg mask.
4054 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4055 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4056
4057 // Allocate a new Reg Mask and copy Mask.
4058 RegMask = MF.allocateRegMask();
4059 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4060 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4061
4062 // Make sure all sub registers of the argument registers are reset
4063 // in the RegMask.
4064 for (auto const &RegPair : RegsToPass)
4065 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4066 SubRegs.isValid(); ++SubRegs)
4067 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4068
4069 // Create the RegMask Operand according to our updated mask.
4070 Ops.push_back(DAG.getRegisterMask(RegMask));
4071 } else {
4072 // Create the RegMask Operand according to the static mask.
4073 Ops.push_back(DAG.getRegisterMask(Mask));
4074 }
4075
4076 if (InFlag.getNode())
4077 Ops.push_back(InFlag);
4078
4079 if (isTailCall) {
4080 // We used to do:
4081 //// If this is the first return lowered for this function, add the regs
4082 //// to the liveout set for the function.
4083 // This isn't right, although it's probably harmless on x86; liveouts
4084 // should be computed from returns not tail calls. Consider a void
4085 // function making a tail call to a function returning int.
4086 MF.getFrameInfo().setHasTailCall();
4087 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4088 }
4089
4090 if (HasNoCfCheck && IsCFProtectionSupported) {
4091 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4092 } else {
4093 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4094 }
4095 InFlag = Chain.getValue(1);
4096
4097 // Create the CALLSEQ_END node.
4098 unsigned NumBytesForCalleeToPop;
4099 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4100 DAG.getTarget().Options.GuaranteedTailCallOpt))
4101 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4102 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4103 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4104 SR == StackStructReturn)
4105 // If this is a call to a struct-return function, the callee
4106 // pops the hidden struct pointer, so we have to push it back.
4107 // This is common for Darwin/X86, Linux & Mingw32 targets.
4108 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4109 NumBytesForCalleeToPop = 4;
4110 else
4111 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4112
4113 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4114 // No need to reset the stack after the call if the call doesn't return. To
4115 // make the MI verify, we'll pretend the callee does it for us.
4116 NumBytesForCalleeToPop = NumBytes;
4117 }
4118
4119 // Returns a flag for retval copy to use.
4120 if (!IsSibcall) {
4121 Chain = DAG.getCALLSEQ_END(Chain,
4122 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4123 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4124 true),
4125 InFlag, dl);
4126 InFlag = Chain.getValue(1);
4127 }
4128
4129 // Handle result values, copying them out of physregs into vregs that we
4130 // return.
4131 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4132 InVals, RegMask);
4133}
4134
4135//===----------------------------------------------------------------------===//
4136// Fast Calling Convention (tail call) implementation
4137//===----------------------------------------------------------------------===//
4138
4139// Like std call, callee cleans arguments, convention except that ECX is
4140// reserved for storing the tail called function address. Only 2 registers are
4141// free for argument passing (inreg). Tail call optimization is performed
4142// provided:
4143// * tailcallopt is enabled
4144// * caller/callee are fastcc
4145// On X86_64 architecture with GOT-style position independent code only local
4146// (within module) calls are supported at the moment.
4147// To keep the stack aligned according to platform abi the function
4148// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4149// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4150// If a tail called function callee has more arguments than the caller the
4151// caller needs to make sure that there is room to move the RETADDR to. This is
4152// achieved by reserving an area the size of the argument delta right after the
4153// original RETADDR, but before the saved framepointer or the spilled registers
4154// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4155// stack layout:
4156// arg1
4157// arg2
4158// RETADDR
4159// [ new RETADDR
4160// move area ]
4161// (possible EBP)
4162// ESI
4163// EDI
4164// local1 ..
4165
4166/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4167/// requirement.
4168unsigned
4169X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4170 SelectionDAG& DAG) const {
4171 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4172 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4173 unsigned StackAlignment = TFI.getStackAlignment();
4174 uint64_t AlignMask = StackAlignment - 1;
4175 int64_t Offset = StackSize;
4176 unsigned SlotSize = RegInfo->getSlotSize();
4177 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4178 // Number smaller than 12 so just add the difference.
4179 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4180 } else {
4181 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4182 Offset = ((~AlignMask) & Offset) + StackAlignment +
4183 (StackAlignment-SlotSize);
4184 }
4185 return Offset;
4186}
4187
4188/// Return true if the given stack call argument is already available in the
4189/// same position (relatively) of the caller's incoming argument stack.
4190static
4191bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4192 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4193 const X86InstrInfo *TII, const CCValAssign &VA) {
4194 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4195
4196 for (;;) {
4197 // Look through nodes that don't alter the bits of the incoming value.
4198 unsigned Op = Arg.getOpcode();
4199 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4200 Arg = Arg.getOperand(0);
4201 continue;
4202 }
4203 if (Op == ISD::TRUNCATE) {
4204 const SDValue &TruncInput = Arg.getOperand(0);
4205 if (TruncInput.getOpcode() == ISD::AssertZext &&
4206 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4207 Arg.getValueType()) {
4208 Arg = TruncInput.getOperand(0);
4209 continue;
4210 }
4211 }
4212 break;
4213 }
4214
4215 int FI = INT_MAX2147483647;
4216 if (Arg.getOpcode() == ISD::CopyFromReg) {
4217 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4218 if (!TargetRegisterInfo::isVirtualRegister(VR))
4219 return false;
4220 MachineInstr *Def = MRI->getVRegDef(VR);
4221 if (!Def)
4222 return false;
4223 if (!Flags.isByVal()) {
4224 if (!TII->isLoadFromStackSlot(*Def, FI))
4225 return false;
4226 } else {
4227 unsigned Opcode = Def->getOpcode();
4228 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4229 Opcode == X86::LEA64_32r) &&
4230 Def->getOperand(1).isFI()) {
4231 FI = Def->getOperand(1).getIndex();
4232 Bytes = Flags.getByValSize();
4233 } else
4234 return false;
4235 }
4236 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4237 if (Flags.isByVal())
4238 // ByVal argument is passed in as a pointer but it's now being
4239 // dereferenced. e.g.
4240 // define @foo(%struct.X* %A) {
4241 // tail call @bar(%struct.X* byval %A)
4242 // }
4243 return false;
4244 SDValue Ptr = Ld->getBasePtr();
4245 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4246 if (!FINode)
4247 return false;
4248 FI = FINode->getIndex();
4249 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4250 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4251 FI = FINode->getIndex();
4252 Bytes = Flags.getByValSize();
4253 } else
4254 return false;
4255
4256 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4256, __PRETTY_FUNCTION__))
;
4257 if (!MFI.isFixedObjectIndex(FI))
4258 return false;
4259
4260 if (Offset != MFI.getObjectOffset(FI))
4261 return false;
4262
4263 // If this is not byval, check that the argument stack object is immutable.
4264 // inalloca and argument copy elision can create mutable argument stack
4265 // objects. Byval objects can be mutated, but a byval call intends to pass the
4266 // mutated memory.
4267 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4268 return false;
4269
4270 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4271 // If the argument location is wider than the argument type, check that any
4272 // extension flags match.
4273 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4274 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4275 return false;
4276 }
4277 }
4278
4279 return Bytes == MFI.getObjectSize(FI);
4280}
4281
4282/// Check whether the call is eligible for tail call optimization. Targets
4283/// that want to do tail call optimization should implement this function.
4284bool X86TargetLowering::IsEligibleForTailCallOptimization(
4285 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4286 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4287 const SmallVectorImpl<ISD::OutputArg> &Outs,
4288 const SmallVectorImpl<SDValue> &OutVals,
4289 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4290 if (!mayTailCallThisCC(CalleeCC))
4291 return false;
4292
4293 // If -tailcallopt is specified, make fastcc functions tail-callable.
4294 MachineFunction &MF = DAG.getMachineFunction();
4295 const Function &CallerF = MF.getFunction();
4296
4297 // If the function return type is x86_fp80 and the callee return type is not,
4298 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4299 // perform a tailcall optimization here.
4300 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4301 return false;
4302
4303 CallingConv::ID CallerCC = CallerF.getCallingConv();
4304 bool CCMatch = CallerCC == CalleeCC;
4305 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4306 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4307
4308 // Win64 functions have extra shadow space for argument homing. Don't do the
4309 // sibcall if the caller and callee have mismatched expectations for this
4310 // space.
4311 if (IsCalleeWin64 != IsCallerWin64)
4312 return false;
4313
4314 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4315 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4316 return true;
4317 return false;
4318 }
4319
4320 // Look for obvious safe cases to perform tail call optimization that do not
4321 // require ABI changes. This is what gcc calls sibcall.
4322
4323 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4324 // emit a special epilogue.
4325 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4326 if (RegInfo->needsStackRealignment(MF))
4327 return false;
4328
4329 // Also avoid sibcall optimization if either caller or callee uses struct
4330 // return semantics.
4331 if (isCalleeStructRet || isCallerStructRet)
4332 return false;
4333
4334 // Do not sibcall optimize vararg calls unless all arguments are passed via
4335 // registers.
4336 LLVMContext &C = *DAG.getContext();
4337 if (isVarArg && !Outs.empty()) {
4338 // Optimizing for varargs on Win64 is unlikely to be safe without
4339 // additional testing.
4340 if (IsCalleeWin64 || IsCallerWin64)
4341 return false;
4342
4343 SmallVector<CCValAssign, 16> ArgLocs;
4344 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4345
4346 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4347 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4348 if (!ArgLocs[i].isRegLoc())
4349 return false;
4350 }
4351
4352 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4353 // stack. Therefore, if it's not used by the call it is not safe to optimize
4354 // this into a sibcall.
4355 bool Unused = false;
4356 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4357 if (!Ins[i].Used) {
4358 Unused = true;
4359 break;
4360 }
4361 }
4362 if (Unused) {
4363 SmallVector<CCValAssign, 16> RVLocs;
4364 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4365 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4366 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4367 CCValAssign &VA = RVLocs[i];
4368 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4369 return false;
4370 }
4371 }
4372
4373 // Check that the call results are passed in the same way.
4374 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4375 RetCC_X86, RetCC_X86))
4376 return false;
4377 // The callee has to preserve all registers the caller needs to preserve.
4378 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4379 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4380 if (!CCMatch) {
4381 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4382 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4383 return false;
4384 }
4385
4386 unsigned StackArgsSize = 0;
4387
4388 // If the callee takes no arguments then go on to check the results of the
4389 // call.
4390 if (!Outs.empty()) {
4391 // Check if stack adjustment is needed. For now, do not do this if any
4392 // argument is passed on the stack.
4393 SmallVector<CCValAssign, 16> ArgLocs;
4394 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4395
4396 // Allocate shadow area for Win64
4397 if (IsCalleeWin64)
4398 CCInfo.AllocateStack(32, 8);
4399
4400 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4401 StackArgsSize = CCInfo.getNextStackOffset();
4402
4403 if (CCInfo.getNextStackOffset()) {
4404 // Check if the arguments are already laid out in the right way as
4405 // the caller's fixed stack objects.
4406 MachineFrameInfo &MFI = MF.getFrameInfo();
4407 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4408 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4409 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4410 CCValAssign &VA = ArgLocs[i];
4411 SDValue Arg = OutVals[i];
4412 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4413 if (VA.getLocInfo() == CCValAssign::Indirect)
4414 return false;
4415 if (!VA.isRegLoc()) {
4416 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4417 MFI, MRI, TII, VA))
4418 return false;
4419 }
4420 }
4421 }
4422
4423 bool PositionIndependent = isPositionIndependent();
4424 // If the tailcall address may be in a register, then make sure it's
4425 // possible to register allocate for it. In 32-bit, the call address can
4426 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4427 // callee-saved registers are restored. These happen to be the same
4428 // registers used to pass 'inreg' arguments so watch out for those.
4429 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4430 !isa<ExternalSymbolSDNode>(Callee)) ||
4431 PositionIndependent)) {
4432 unsigned NumInRegs = 0;
4433 // In PIC we need an extra register to formulate the address computation
4434 // for the callee.
4435 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4436
4437 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4438 CCValAssign &VA = ArgLocs[i];
4439 if (!VA.isRegLoc())
4440 continue;
4441 unsigned Reg = VA.getLocReg();
4442 switch (Reg) {
4443 default: break;
4444 case X86::EAX: case X86::EDX: case X86::ECX:
4445 if (++NumInRegs == MaxInRegs)
4446 return false;
4447 break;
4448 }
4449 }
4450 }
4451
4452 const MachineRegisterInfo &MRI = MF.getRegInfo();
4453 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4454 return false;
4455 }
4456
4457 bool CalleeWillPop =
4458 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4459 MF.getTarget().Options.GuaranteedTailCallOpt);
4460
4461 if (unsigned BytesToPop =
4462 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4463 // If we have bytes to pop, the callee must pop them.
4464 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4465 if (!CalleePopMatches)
4466 return false;
4467 } else if (CalleeWillPop && StackArgsSize > 0) {
4468 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4469 return false;
4470 }
4471
4472 return true;
4473}
4474
4475FastISel *
4476X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4477 const TargetLibraryInfo *libInfo) const {
4478 return X86::createFastISel(funcInfo, libInfo);
4479}
4480
4481//===----------------------------------------------------------------------===//
4482// Other Lowering Hooks
4483//===----------------------------------------------------------------------===//
4484
4485static bool MayFoldLoad(SDValue Op) {
4486 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4487}
4488
4489static bool MayFoldIntoStore(SDValue Op) {
4490 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4491}
4492
4493static bool MayFoldIntoZeroExtend(SDValue Op) {
4494 if (Op.hasOneUse()) {
4495 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4496 return (ISD::ZERO_EXTEND == Opcode);
4497 }
4498 return false;
4499}
4500
4501static bool isTargetShuffle(unsigned Opcode) {
4502 switch(Opcode) {
4503 default: return false;
4504 case X86ISD::BLENDI:
4505 case X86ISD::PSHUFB:
4506 case X86ISD::PSHUFD:
4507 case X86ISD::PSHUFHW:
4508 case X86ISD::PSHUFLW:
4509 case X86ISD::SHUFP:
4510 case X86ISD::INSERTPS:
4511 case X86ISD::EXTRQI:
4512 case X86ISD::INSERTQI:
4513 case X86ISD::PALIGNR:
4514 case X86ISD::VSHLDQ:
4515 case X86ISD::VSRLDQ:
4516 case X86ISD::MOVLHPS:
4517 case X86ISD::MOVHLPS:
4518 case X86ISD::MOVSHDUP:
4519 case X86ISD::MOVSLDUP:
4520 case X86ISD::MOVDDUP:
4521 case X86ISD::MOVSS:
4522 case X86ISD::MOVSD:
4523 case X86ISD::UNPCKL:
4524 case X86ISD::UNPCKH:
4525 case X86ISD::VBROADCAST:
4526 case X86ISD::VPERMILPI:
4527 case X86ISD::VPERMILPV:
4528 case X86ISD::VPERM2X128:
4529 case X86ISD::SHUF128:
4530 case X86ISD::VPERMIL2:
4531 case X86ISD::VPERMI:
4532 case X86ISD::VPPERM:
4533 case X86ISD::VPERMV:
4534 case X86ISD::VPERMV3:
4535 case X86ISD::VZEXT_MOVL:
4536 return true;
4537 }
4538}
4539
4540static bool isTargetShuffleVariableMask(unsigned Opcode) {
4541 switch (Opcode) {
4542 default: return false;
4543 // Target Shuffles.
4544 case X86ISD::PSHUFB:
4545 case X86ISD::VPERMILPV:
4546 case X86ISD::VPERMIL2:
4547 case X86ISD::VPPERM:
4548 case X86ISD::VPERMV:
4549 case X86ISD::VPERMV3:
4550 return true;
4551 // 'Faux' Target Shuffles.
4552 case ISD::OR:
4553 case ISD::AND:
4554 case X86ISD::ANDNP:
4555 return true;
4556 }
4557}
4558
4559SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4560 MachineFunction &MF = DAG.getMachineFunction();
4561 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4562 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4563 int ReturnAddrIndex = FuncInfo->getRAIndex();
4564
4565 if (ReturnAddrIndex == 0) {
4566 // Set up a frame object for the return address.
4567 unsigned SlotSize = RegInfo->getSlotSize();
4568 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4569 -(int64_t)SlotSize,
4570 false);
4571 FuncInfo->setRAIndex(ReturnAddrIndex);
4572 }
4573
4574 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4575}
4576
4577bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4578 bool hasSymbolicDisplacement) {
4579 // Offset should fit into 32 bit immediate field.
4580 if (!isInt<32>(Offset))
4581 return false;
4582
4583 // If we don't have a symbolic displacement - we don't have any extra
4584 // restrictions.
4585 if (!hasSymbolicDisplacement)
4586 return true;
4587
4588 // FIXME: Some tweaks might be needed for medium code model.
4589 if (M != CodeModel::Small && M != CodeModel::Kernel)
4590 return false;
4591
4592 // For small code model we assume that latest object is 16MB before end of 31
4593 // bits boundary. We may also accept pretty large negative constants knowing
4594 // that all objects are in the positive half of address space.
4595 if (M == CodeModel::Small && Offset < 16*1024*1024)
4596 return true;
4597
4598 // For kernel code model we know that all object resist in the negative half
4599 // of 32bits address space. We may not accept negative offsets, since they may
4600 // be just off and we may accept pretty large positive ones.
4601 if (M == CodeModel::Kernel && Offset >= 0)
4602 return true;
4603
4604 return false;
4605}
4606
4607/// Determines whether the callee is required to pop its own arguments.
4608/// Callee pop is necessary to support tail calls.
4609bool X86::isCalleePop(CallingConv::ID CallingConv,
4610 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4611 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4612 // can guarantee TCO.
4613 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4614 return true;
4615
4616 switch (CallingConv) {
4617 default:
4618 return false;
4619 case CallingConv::X86_StdCall:
4620 case CallingConv::X86_FastCall:
4621 case CallingConv::X86_ThisCall:
4622 case CallingConv::X86_VectorCall:
4623 return !is64Bit;
4624 }
4625}
4626
4627/// Return true if the condition is an unsigned comparison operation.
4628static bool isX86CCUnsigned(unsigned X86CC) {
4629 switch (X86CC) {
4630 default:
4631 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4631)
;
4632 case X86::COND_E:
4633 case X86::COND_NE:
4634 case X86::COND_B:
4635 case X86::COND_A:
4636 case X86::COND_BE:
4637 case X86::COND_AE:
4638 return true;
4639 case X86::COND_G:
4640 case X86::COND_GE:
4641 case X86::COND_L:
4642 case X86::COND_LE:
4643 return false;
4644 }
4645}
4646
4647static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4648 switch (SetCCOpcode) {
4649 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4649)
;
4650 case ISD::SETEQ: return X86::COND_E;
4651 case ISD::SETGT: return X86::COND_G;
4652 case ISD::SETGE: return X86::COND_GE;
4653 case ISD::SETLT: return X86::COND_L;
4654 case ISD::SETLE: return X86::COND_LE;
4655 case ISD::SETNE: return X86::COND_NE;
4656 case ISD::SETULT: return X86::COND_B;
4657 case ISD::SETUGT: return X86::COND_A;
4658 case ISD::SETULE: return X86::COND_BE;
4659 case ISD::SETUGE: return X86::COND_AE;
4660 }
4661}
4662
4663/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4664/// condition code, returning the condition code and the LHS/RHS of the
4665/// comparison to make.
4666static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4667 bool isFP, SDValue &LHS, SDValue &RHS,
4668 SelectionDAG &DAG) {
4669 if (!isFP) {
4670 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4671 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4672 // X > -1 -> X == 0, jump !sign.
4673 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4674 return X86::COND_NS;
4675 }
4676 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4677 // X < 0 -> X == 0, jump on sign.
4678 return X86::COND_S;
4679 }
4680 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4681 // X < 1 -> X <= 0
4682 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4683 return X86::COND_LE;
4684 }
4685 }
4686
4687 return TranslateIntegerX86CC(SetCCOpcode);
4688 }
4689
4690 // First determine if it is required or is profitable to flip the operands.
4691
4692 // If LHS is a foldable load, but RHS is not, flip the condition.
4693 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4694 !ISD::isNON_EXTLoad(RHS.getNode())) {
4695 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4696 std::swap(LHS, RHS);
4697 }
4698
4699 switch (SetCCOpcode) {
4700 default: break;
4701 case ISD::SETOLT:
4702 case ISD::SETOLE:
4703 case ISD::SETUGT:
4704 case ISD::SETUGE:
4705 std::swap(LHS, RHS);
4706 break;
4707 }
4708
4709 // On a floating point condition, the flags are set as follows:
4710 // ZF PF CF op
4711 // 0 | 0 | 0 | X > Y
4712 // 0 | 0 | 1 | X < Y
4713 // 1 | 0 | 0 | X == Y
4714 // 1 | 1 | 1 | unordered
4715 switch (SetCCOpcode) {
4716 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4716)
;
4717 case ISD::SETUEQ:
4718 case ISD::SETEQ: return X86::COND_E;
4719 case ISD::SETOLT: // flipped
4720 case ISD::SETOGT:
4721 case ISD::SETGT: return X86::COND_A;
4722 case ISD::SETOLE: // flipped
4723 case ISD::SETOGE:
4724 case ISD::SETGE: return X86::COND_AE;
4725 case ISD::SETUGT: // flipped
4726 case ISD::SETULT:
4727 case ISD::SETLT: return X86::COND_B;
4728 case ISD::SETUGE: // flipped
4729 case ISD::SETULE:
4730 case ISD::SETLE: return X86::COND_BE;
4731 case ISD::SETONE:
4732 case ISD::SETNE: return X86::COND_NE;
4733 case ISD::SETUO: return X86::COND_P;
4734 case ISD::SETO: return X86::COND_NP;
4735 case ISD::SETOEQ:
4736 case ISD::SETUNE: return X86::COND_INVALID;
4737 }
4738}
4739
4740/// Is there a floating point cmov for the specific X86 condition code?
4741/// Current x86 isa includes the following FP cmov instructions:
4742/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4743static bool hasFPCMov(unsigned X86CC) {
4744 switch (X86CC) {
4745 default:
4746 return false;
4747 case X86::COND_B:
4748 case X86::COND_BE:
4749 case X86::COND_E:
4750 case X86::COND_P:
4751 case X86::COND_A:
4752 case X86::COND_AE:
4753 case X86::COND_NE:
4754 case X86::COND_NP:
4755 return true;
4756 }
4757}
4758
4759
4760bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4761 const CallInst &I,
4762 MachineFunction &MF,
4763 unsigned Intrinsic) const {
4764
4765 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4766 if (!IntrData)
4767 return false;
4768
4769 Info.opc = ISD::INTRINSIC_W_CHAIN;
4770 Info.flags = MachineMemOperand::MONone;
4771 Info.offset = 0;
4772
4773 switch (IntrData->Type) {
4774 case TRUNCATE_TO_MEM_VI8:
4775 case TRUNCATE_TO_MEM_VI16:
4776 case TRUNCATE_TO_MEM_VI32: {
4777 Info.ptrVal = I.getArgOperand(0);
4778 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4779 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4780 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4781 ScalarVT = MVT::i8;
4782 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4783 ScalarVT = MVT::i16;
4784 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4785 ScalarVT = MVT::i32;
4786
4787 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4788 Info.align = 1;
4789 Info.flags |= MachineMemOperand::MOStore;
4790 break;
4791 }
4792 default:
4793 return false;
4794 }
4795
4796 return true;
4797}
4798
4799/// Returns true if the target can instruction select the
4800/// specified FP immediate natively. If false, the legalizer will
4801/// materialize the FP immediate as a load from a constant pool.
4802bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4803 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4804 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4805 return true;
4806 }
4807 return false;
4808}
4809
4810bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4811 ISD::LoadExtType ExtTy,
4812 EVT NewVT) const {
4813 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4814 // relocation target a movq or addq instruction: don't let the load shrink.
4815 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4816 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4817 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4818 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4819 return true;
4820}
4821
4822/// Returns true if it is beneficial to convert a load of a constant
4823/// to just the constant itself.
4824bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4825 Type *Ty) const {
4826 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 4826, __PRETTY_FUNCTION__))
;
4827
4828 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4829 if (BitSize == 0 || BitSize > 64)
4830 return false;
4831 return true;
4832}
4833
4834bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
4835 // If we are using XMM registers in the ABI and the condition of the select is
4836 // a floating-point compare and we have blendv or conditional move, then it is
4837 // cheaper to select instead of doing a cross-register move and creating a
4838 // load that depends on the compare result.
4839 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
4840}
4841
4842bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4843 // TODO: It might be a win to ease or lift this restriction, but the generic
4844 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4845 if (VT.isVector() && Subtarget.hasAVX512())
4846 return false;
4847
4848 return true;
4849}
4850
4851bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4852 // TODO: We handle scalars using custom code, but generic combining could make
4853 // that unnecessary.
4854 APInt MulC;
4855 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4856 return false;
4857
4858 // If vector multiply is legal, assume that's faster than shl + add/sub.
4859 // TODO: Multiply is a complex op with higher latency and lower througput in
4860 // most implementations, so this check could be loosened based on type
4861 // and/or a CPU attribute.
4862 if (isOperationLegal(ISD::MUL, VT))
4863 return false;
4864
4865 // shl+add, shl+sub, shl+add+neg
4866 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4867 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4868}
4869
4870bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4871 bool IsSigned) const {
4872 // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4873 return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4874}
4875
4876bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4877 unsigned Index) const {
4878 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4879 return false;
4880
4881 // Mask vectors support all subregister combinations and operations that
4882 // extract half of vector.
4883 if (ResVT.getVectorElementType() == MVT::i1)
4884 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4885 (Index == ResVT.getVectorNumElements()));
4886
4887 return (Index % ResVT.getVectorNumElements()) == 0;
4888}
4889
4890bool X86TargetLowering::isCheapToSpeculateCttz() const {
4891 // Speculate cttz only if we can directly use TZCNT.
4892 return Subtarget.hasBMI();
4893}
4894
4895bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4896 // Speculate ctlz only if we can directly use LZCNT.
4897 return Subtarget.hasLZCNT();
4898}
4899
4900bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4901 EVT BitcastVT) const {
4902 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
4903 BitcastVT.getVectorElementType() == MVT::i1)
4904 return false;
4905
4906 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
4907 return false;
4908
4909 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4910}
4911
4912bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4913 const SelectionDAG &DAG) const {
4914 // Do not merge to float value size (128 bytes) if no implicit
4915 // float attribute is set.
4916 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4917 Attribute::NoImplicitFloat);
4918
4919 if (NoFloat) {
4920 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4921 return (MemVT.getSizeInBits() <= MaxIntSize);
4922 }
4923 return true;
4924}
4925
4926bool X86TargetLowering::isCtlzFast() const {
4927 return Subtarget.hasFastLZCNT();
4928}
4929
4930bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4931 const Instruction &AndI) const {
4932 return true;
4933}
4934
4935bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4936 EVT VT = Y.getValueType();
4937
4938 if (VT.isVector())
4939 return false;
4940
4941 if (!Subtarget.hasBMI())
4942 return false;
4943
4944 // There are only 32-bit and 64-bit forms for 'andn'.
4945 if (VT != MVT::i32 && VT != MVT::i64)
4946 return false;
4947
4948 return !isa<ConstantSDNode>(Y);
4949}
4950
4951bool X86TargetLowering::hasAndNot(SDValue Y) const {
4952 EVT VT = Y.getValueType();
4953
4954 if (!VT.isVector())
4955 return hasAndNotCompare(Y);
4956
4957 // Vector.
4958
4959 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
4960 return false;
4961
4962 if (VT == MVT::v4i32)
4963 return true;
4964
4965 return Subtarget.hasSSE2();
4966}
4967
4968bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
4969 EVT VT = Y.getValueType();
4970
4971 // For vectors, we don't have a preference, but we probably want a mask.
4972 if (VT.isVector())
4973 return false;
4974
4975 // 64-bit shifts on 32-bit targets produce really bad bloated code.
4976 if (VT == MVT::i64 && !Subtarget.is64Bit())
4977 return false;
4978
4979 return true;
4980}
4981
4982bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
4983 // Any legal vector type can be splatted more efficiently than
4984 // loading/spilling from memory.
4985 return isTypeLegal(VT);
4986}
4987
4988MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4989 MVT VT = MVT::getIntegerVT(NumBits);
4990 if (isTypeLegal(VT))
4991 return VT;
4992
4993 // PMOVMSKB can handle this.
4994 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4995 return MVT::v16i8;
4996
4997 // VPMOVMSKB can handle this.
4998 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4999 return MVT::v32i8;
5000
5001 // TODO: Allow 64-bit type for 32-bit target.
5002 // TODO: 512-bit types should be allowed, but make sure that those
5003 // cases are handled in combineVectorSizedSetCCEquality().
5004
5005 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5006}
5007
5008/// Val is the undef sentinel value or equal to the specified value.
5009static bool isUndefOrEqual(int Val, int CmpVal) {
5010 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5011}
5012
5013/// Val is either the undef or zero sentinel value.
5014static bool isUndefOrZero(int Val) {
5015 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5016}
5017
5018/// Return true if every element in Mask, beginning
5019/// from position Pos and ending in Pos+Size is the undef sentinel value.
5020static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5021 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5022 if (Mask[i] != SM_SentinelUndef)
5023 return false;
5024 return true;
5025}
5026
5027/// Return true if Val falls within the specified range (L, H].
5028static bool isInRange(int Val, int Low, int Hi) {
5029 return (Val >= Low && Val < Hi);
5030}
5031
5032/// Return true if the value of any element in Mask falls within the specified
5033/// range (L, H].
5034static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5035 for (int M : Mask)
5036 if (isInRange(M, Low, Hi))
5037 return true;
5038 return false;
5039}
5040
5041/// Return true if Val is undef or if its value falls within the
5042/// specified range (L, H].
5043static bool isUndefOrInRange(int Val, int Low, int Hi) {
5044 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5045}
5046
5047/// Return true if every element in Mask is undef or if its value
5048/// falls within the specified range (L, H].
5049static bool isUndefOrInRange(ArrayRef<int> Mask,
5050 int Low, int Hi) {
5051 for (int M : Mask)
5052 if (!isUndefOrInRange(M, Low, Hi))
5053 return false;
5054 return true;
5055}
5056
5057/// Return true if Val is undef, zero or if its value falls within the
5058/// specified range (L, H].
5059static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5060 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5061}
5062
5063/// Return true if every element in Mask is undef, zero or if its value
5064/// falls within the specified range (L, H].
5065static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5066 for (int M : Mask)
5067 if (!isUndefOrZeroOrInRange(M, Low, Hi))
5068 return false;
5069 return true;
5070}
5071
5072/// Return true if every element in Mask, beginning
5073/// from position Pos and ending in Pos + Size, falls within the specified
5074/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5075static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5076 unsigned Size, int Low, int Step = 1) {
5077 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5078 if (!isUndefOrEqual(Mask[i], Low))
5079 return false;
5080 return true;
5081}
5082
5083/// Return true if every element in Mask, beginning
5084/// from position Pos and ending in Pos+Size, falls within the specified
5085/// sequential range (Low, Low+Size], or is undef or is zero.
5086static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5087 unsigned Size, int Low) {
5088 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
5089 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5090 return false;
5091 return true;
5092}
5093
5094/// Return true if every element in Mask, beginning
5095/// from position Pos and ending in Pos+Size is undef or is zero.
5096static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5097 unsigned Size) {
5098 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5099 if (!isUndefOrZero(Mask[i]))
5100 return false;
5101 return true;
5102}
5103
5104/// Helper function to test whether a shuffle mask could be
5105/// simplified by widening the elements being shuffled.
5106///
5107/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5108/// leaves it in an unspecified state.
5109///
5110/// NOTE: This must handle normal vector shuffle masks and *target* vector
5111/// shuffle masks. The latter have the special property of a '-2' representing
5112/// a zero-ed lane of a vector.
5113static bool canWidenShuffleElements(ArrayRef<int> Mask,
5114 SmallVectorImpl<int> &WidenedMask) {
5115 WidenedMask.assign(Mask.size() / 2, 0);
5116 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5117 int M0 = Mask[i];
5118 int M1 = Mask[i + 1];
5119
5120 // If both elements are undef, its trivial.
5121 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5122 WidenedMask[i / 2] = SM_SentinelUndef;
5123 continue;
5124 }
5125
5126 // Check for an undef mask and a mask value properly aligned to fit with
5127 // a pair of values. If we find such a case, use the non-undef mask's value.
5128 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5129 WidenedMask[i / 2] = M1 / 2;
5130 continue;
5131 }
5132 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5133 WidenedMask[i / 2] = M0 / 2;
5134 continue;
5135 }
5136
5137 // When zeroing, we need to spread the zeroing across both lanes to widen.
5138 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5139 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5140 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5141 WidenedMask[i / 2] = SM_SentinelZero;
5142 continue;
5143 }
5144 return false;
5145 }
5146
5147 // Finally check if the two mask values are adjacent and aligned with
5148 // a pair.
5149 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5150 WidenedMask[i / 2] = M0 / 2;
5151 continue;
5152 }
5153
5154 // Otherwise we can't safely widen the elements used in this shuffle.
5155 return false;
5156 }
5157 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
5158 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
;
5159
5160 return true;
5161}
5162
5163static bool canWidenShuffleElements(ArrayRef<int> Mask,
5164 const APInt &Zeroable,
5165 SmallVectorImpl<int> &WidenedMask) {
5166 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5167 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5168 if (TargetMask[i] == SM_SentinelUndef)
5169 continue;
5170 if (Zeroable[i])
5171 TargetMask[i] = SM_SentinelZero;
5172 }
5173 return canWidenShuffleElements(TargetMask, WidenedMask);
5174}
5175
5176static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5177 SmallVector<int, 32> WidenedMask;
5178 return canWidenShuffleElements(Mask, WidenedMask);
5179}
5180
5181/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5182bool X86::isZeroNode(SDValue Elt) {
5183 return isNullConstant(Elt) || isNullFPConstant(Elt);
5184}
5185
5186// Build a vector of constants.
5187// Use an UNDEF node if MaskElt == -1.
5188// Split 64-bit constants in the 32-bit mode.
5189static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5190 const SDLoc &dl, bool IsMask = false) {
5191
5192 SmallVector<SDValue, 32> Ops;
5193 bool Split = false;
5194
5195 MVT ConstVecVT = VT;
5196 unsigned NumElts = VT.getVectorNumElements();
5197 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5198 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5199 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5200 Split = true;
5201 }
5202
5203 MVT EltVT = ConstVecVT.getVectorElementType();
5204 for (unsigned i = 0; i < NumElts; ++i) {
5205 bool IsUndef = Values[i] < 0 && IsMask;
5206 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5207 DAG.getConstant(Values[i], dl, EltVT);
5208 Ops.push_back(OpNode);
5209 if (Split)
5210 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5211 DAG.getConstant(0, dl, EltVT));
5212 }
5213 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5214 if (Split)
5215 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5216 return ConstsNode;
5217}
5218
5219static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5220 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5221 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5222, __PRETTY_FUNCTION__))
5222 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5222, __PRETTY_FUNCTION__))
;
5223 SmallVector<SDValue, 32> Ops;
5224 bool Split = false;
5225
5226 MVT ConstVecVT = VT;
5227 unsigned NumElts = VT.getVectorNumElements();
5228 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5229 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5230 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5231 Split = true;
5232 }
5233
5234 MVT EltVT = ConstVecVT.getVectorElementType();
5235 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5236 if (Undefs[i]) {
5237 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5238 continue;
5239 }
5240 const APInt &V = Bits[i];
5241 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5241, __PRETTY_FUNCTION__))
;
5242 if (Split) {
5243 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5244 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5245 } else if (EltVT == MVT::f32) {
5246 APFloat FV(APFloat::IEEEsingle(), V);
5247 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5248 } else if (EltVT == MVT::f64) {
5249 APFloat FV(APFloat::IEEEdouble(), V);
5250 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5251 } else {
5252 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5253 }
5254 }
5255
5256 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5257 return DAG.getBitcast(VT, ConstsNode);
5258}
5259
5260/// Returns a vector of specified type with all zero elements.
5261static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5262 SelectionDAG &DAG, const SDLoc &dl) {
5263 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5265, __PRETTY_FUNCTION__))
5264 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5265, __PRETTY_FUNCTION__))
5265 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5265, __PRETTY_FUNCTION__))
;
5266
5267 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5268 // type. This ensures they get CSE'd. But if the integer type is not
5269 // available, use a floating-point +0.0 instead.
5270 SDValue Vec;
5271 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5272 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5273 } else if (VT.getVectorElementType() == MVT::i1) {
5274 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5275, __PRETTY_FUNCTION__))
5275 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5275, __PRETTY_FUNCTION__))
;
5276 Vec = DAG.getConstant(0, dl, VT);
5277 } else {
5278 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5279 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5280 }
5281 return DAG.getBitcast(VT, Vec);
5282}
5283
5284static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5285 const SDLoc &dl, unsigned vectorWidth) {
5286 EVT VT = Vec.getValueType();
5287 EVT ElVT = VT.getVectorElementType();
5288 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5289 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5290 VT.getVectorNumElements()/Factor);
5291
5292 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5293 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5294 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5294, __PRETTY_FUNCTION__))
;
5295
5296 // This is the index of the first element of the vectorWidth-bit chunk
5297 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5298 IdxVal &= ~(ElemsPerChunk - 1);
5299
5300 // If the input is a buildvector just emit a smaller one.
5301 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5302 return DAG.getBuildVector(ResultVT, dl,
5303 Vec->ops().slice(IdxVal, ElemsPerChunk));
5304
5305 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5306 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5307}
5308
5309/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5310/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5311/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5312/// instructions or a simple subregister reference. Idx is an index in the
5313/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5314/// lowering EXTRACT_VECTOR_ELT operations easier.
5315static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5316 SelectionDAG &DAG, const SDLoc &dl) {
5317 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5318, __PRETTY_FUNCTION__))
5318 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5318, __PRETTY_FUNCTION__))
;
5319 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5320}
5321
5322/// Generate a DAG to grab 256-bits from a 512-bit vector.
5323static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5324 SelectionDAG &DAG, const SDLoc &dl) {
5325 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5325, __PRETTY_FUNCTION__))
;
5326 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5327}
5328
5329static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5330 SelectionDAG &DAG, const SDLoc &dl,
5331 unsigned vectorWidth) {
5332 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __PRETTY_FUNCTION__))
5333 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __PRETTY_FUNCTION__))
;
5334 // Inserting UNDEF is Result
5335 if (Vec.isUndef())
5336 return Result;
5337 EVT VT = Vec.getValueType();
5338 EVT ElVT = VT.getVectorElementType();
5339 EVT ResultVT = Result.getValueType();
5340
5341 // Insert the relevant vectorWidth bits.
5342 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5343 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5343, __PRETTY_FUNCTION__))
;
5344
5345 // This is the index of the first element of the vectorWidth-bit chunk
5346 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5347 IdxVal &= ~(ElemsPerChunk - 1);
5348
5349 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5350 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5351}
5352
5353/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5354/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5355/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5356/// simple superregister reference. Idx is an index in the 128 bits
5357/// we want. It need not be aligned to a 128-bit boundary. That makes
5358/// lowering INSERT_VECTOR_ELT operations easier.
5359static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5360 SelectionDAG &DAG, const SDLoc &dl) {
5361 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5361, __PRETTY_FUNCTION__))
;
5362 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5363}
5364
5365/// Widen a vector to a larger size with the same scalar type, with the new
5366/// elements either zero or undef.
5367static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5368 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5369 const SDLoc &dl) {
5370 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5372, __PRETTY_FUNCTION__))
5371 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5372, __PRETTY_FUNCTION__))
5372 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5372, __PRETTY_FUNCTION__))
;
5373 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5374 : DAG.getUNDEF(VT);
5375 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5376 DAG.getIntPtrConstant(0, dl));
5377}
5378
5379// Helper for splitting operands of an operation to legal target size and
5380// apply a function on each part.
5381// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5382// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5383// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5384// The argument Builder is a function that will be applied on each split part:
5385// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5386template <typename F>
5387SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5388 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5389 F Builder, bool CheckBWI = true) {
5390 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5390, __PRETTY_FUNCTION__))
;
5391 unsigned NumSubs = 1;
5392 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5393 (!CheckBWI && Subtarget.useAVX512Regs())) {
5394 if (VT.getSizeInBits() > 512) {
5395 NumSubs = VT.getSizeInBits() / 512;
5396 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5396, __PRETTY_FUNCTION__))
;
5397 }
5398 } else if (Subtarget.hasAVX2()) {
5399 if (VT.getSizeInBits() > 256) {
5400 NumSubs = VT.getSizeInBits() / 256;
5401 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5401, __PRETTY_FUNCTION__))
;
5402 }
5403 } else {
5404 if (VT.getSizeInBits() > 128) {
5405 NumSubs = VT.getSizeInBits() / 128;
5406 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5406, __PRETTY_FUNCTION__))
;
5407 }
5408 }
5409
5410 if (NumSubs == 1)
5411 return Builder(DAG, DL, Ops);
5412
5413 SmallVector<SDValue, 4> Subs;
5414 for (unsigned i = 0; i != NumSubs; ++i) {
5415 SmallVector<SDValue, 2> SubOps;
5416 for (SDValue Op : Ops) {
5417 EVT OpVT = Op.getValueType();
5418 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5419 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5420 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5421 }
5422 Subs.push_back(Builder(DAG, DL, SubOps));
5423 }
5424 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5425}
5426
5427// Return true if the instruction zeroes the unused upper part of the
5428// destination and accepts mask.
5429static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5430 switch (Opcode) {
5431 default:
5432 return false;
5433 case X86ISD::CMPM:
5434 case X86ISD::CMPM_RND:
5435 case ISD::SETCC:
5436 return true;
5437 }
5438}
5439
5440/// Insert i1-subvector to i1-vector.
5441static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5442 const X86Subtarget &Subtarget) {
5443
5444 SDLoc dl(Op);
5445 SDValue Vec = Op.getOperand(0);
5446 SDValue SubVec = Op.getOperand(1);
5447 SDValue Idx = Op.getOperand(2);
5448
5449 if (!isa<ConstantSDNode>(Idx))
5450 return SDValue();
5451
5452 // Inserting undef is a nop. We can just return the original vector.
5453 if (SubVec.isUndef())
5454 return Vec;
5455
5456 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5457 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5458 return Op;
5459
5460 MVT OpVT = Op.getSimpleValueType();
5461 unsigned NumElems = OpVT.getVectorNumElements();
5462
5463 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5464
5465 // Extend to natively supported kshift.
5466 MVT WideOpVT = OpVT;
5467 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5468 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5469
5470 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5471 // if necessary.
5472 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5473 // May need to promote to a legal type.
5474 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5475 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5476 SubVec, Idx);
5477 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5478 }
5479
5480 MVT SubVecVT = SubVec.getSimpleValueType();
5481 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5482
5483 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5485, __PRETTY_FUNCTION__))
5484 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5485, __PRETTY_FUNCTION__))
5485 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5485, __PRETTY_FUNCTION__))
;
5486
5487 SDValue Undef = DAG.getUNDEF(WideOpVT);
5488
5489 if (IdxVal == 0) {
5490 // Zero lower bits of the Vec
5491 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5492 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5493 ZeroIdx);
5494 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5495 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5496 // Merge them together, SubVec should be zero extended.
5497 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5498 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5499 SubVec, ZeroIdx);
5500 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5501 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5502 }
5503
5504 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5505 Undef, SubVec, ZeroIdx);
5506
5507 if (Vec.isUndef()) {
5508 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5508, __PRETTY_FUNCTION__))
;
5509 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5510 DAG.getConstant(IdxVal, dl, MVT::i8));
5511 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5512 }
5513
5514 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5515 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5515, __PRETTY_FUNCTION__))
;
5516 NumElems = WideOpVT.getVectorNumElements();
5517 unsigned ShiftLeft = NumElems - SubVecNumElems;
5518 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5519 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5520 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5521 if (ShiftRight != 0)
5522 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5523 DAG.getConstant(ShiftRight, dl, MVT::i8));
5524 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5525 }
5526
5527 // Simple case when we put subvector in the upper part
5528 if (IdxVal + SubVecNumElems == NumElems) {
5529 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5530 DAG.getConstant(IdxVal, dl, MVT::i8));
5531 if (SubVecNumElems * 2 == NumElems) {
5532 // Special case, use legal zero extending insert_subvector. This allows
5533 // isel to opimitize when bits are known zero.
5534 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5535 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5536 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5537 Vec, ZeroIdx);
5538 } else {
5539 // Otherwise use explicit shifts to zero the bits.
5540 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5541 Undef, Vec, ZeroIdx);
5542 NumElems = WideOpVT.getVectorNumElements();
5543 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5544 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5545 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5546 }
5547 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5548 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5549 }
5550
5551 // Inserting into the middle is more complicated.
5552
5553 NumElems = WideOpVT.getVectorNumElements();
5554
5555 // Widen the vector if needed.
5556 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5557 // Move the current value of the bit to be replace to the lsbs.
5558 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5559 DAG.getConstant(IdxVal, dl, MVT::i8));
5560 // Xor with the new bit.
5561 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5562 // Shift to MSB, filling bottom bits with 0.
5563 unsigned ShiftLeft = NumElems - SubVecNumElems;
5564 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5565 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5566 // Shift to the final position, filling upper bits with 0.
5567 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5568 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5569 DAG.getConstant(ShiftRight, dl, MVT::i8));
5570 // Xor with original vector leaving the new value.
5571 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5572 // Reduce to original width if needed.
5573 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5574}
5575
5576static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5577 unsigned NumElems, SelectionDAG &DAG,
5578 const SDLoc &dl, unsigned VectorWidth) {
5579 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5580 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5581}
5582
5583/// Returns a vector of specified type with all bits set.
5584/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5585/// Then bitcast to their original type, ensuring they get CSE'd.
5586static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5587 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5588, __PRETTY_FUNCTION__))
5588 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5588, __PRETTY_FUNCTION__))
;
5589
5590 APInt Ones = APInt::getAllOnesValue(32);
5591 unsigned NumElts = VT.getSizeInBits() / 32;
5592 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5593 return DAG.getBitcast(VT, Vec);
5594}
5595
5596static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
5597 SelectionDAG &DAG) {
5598 EVT InVT = In.getValueType();
5599 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5599, __PRETTY_FUNCTION__))
;
5600
5601 // For 256-bit vectors, we only need the lower (128-bit) input half.
5602 // For 512-bit vectors, we only need the lower input half or quarter.
5603 if (InVT.getSizeInBits() > 128) {
5604 assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5605, __PRETTY_FUNCTION__))
5605 "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5605, __PRETTY_FUNCTION__))
;
5606 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5607 In = extractSubVector(In, 0, DAG, DL,
5608 std::max(128U, VT.getSizeInBits() / Scale));
5609 InVT = In.getValueType();
5610 }
5611
5612 if (VT.getVectorNumElements() == InVT.getVectorNumElements())
5613 return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5614 DL, VT, In);
5615
5616 return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
5617 : ISD::ZERO_EXTEND_VECTOR_INREG,
5618 DL, VT, In);
5619}
5620
5621/// Returns a vector_shuffle node for an unpackl operation.
5622static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5623 SDValue V1, SDValue V2) {
5624 SmallVector<int, 8> Mask;
5625 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5626 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5627}
5628
5629/// Returns a vector_shuffle node for an unpackh operation.
5630static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5631 SDValue V1, SDValue V2) {
5632 SmallVector<int, 8> Mask;
5633 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5634 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5635}
5636
5637/// Return a vector_shuffle of the specified vector of zero or undef vector.
5638/// This produces a shuffle where the low element of V2 is swizzled into the
5639/// zero/undef vector, landing at element Idx.
5640/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5641static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5642 bool IsZero,
5643 const X86Subtarget &Subtarget,
5644 SelectionDAG &DAG) {
5645 MVT VT = V2.getSimpleValueType();
5646 SDValue V1 = IsZero
5647 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5648 int NumElems = VT.getVectorNumElements();
5649 SmallVector<int, 16> MaskVec(NumElems);
5650 for (int i = 0; i != NumElems; ++i)
5651 // If this is the insertion idx, put the low elt of V2 here.
5652 MaskVec[i] = (i == Idx) ? NumElems : i;
5653 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5654}
5655
5656// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
5657static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
5658 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
5659 V = V.getOperand(0);
5660 return V;
5661}
5662
5663static const Constant *getTargetConstantFromNode(SDValue Op) {
5664 Op = peekThroughBitcasts(Op);
5665
5666 auto *Load = dyn_cast<LoadSDNode>(Op);
5667 if (!Load)
5668 return nullptr;
5669
5670 SDValue Ptr = Load->getBasePtr();
5671 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5672 Ptr->getOpcode() == X86ISD::WrapperRIP)
5673 Ptr = Ptr->getOperand(0);
5674
5675 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5676 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
5677 return nullptr;
5678
5679 return CNode->getConstVal();
5680}
5681
5682// Extract raw constant bits from constant pools.
5683static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5684 APInt &UndefElts,
5685 SmallVectorImpl<APInt> &EltBits,
5686 bool AllowWholeUndefs = true,
5687 bool AllowPartialUndefs = true) {
5688 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5688, __PRETTY_FUNCTION__))
;
5689
5690 Op = peekThroughBitcasts(Op);
5691
5692 EVT VT = Op.getValueType();
5693 unsigned SizeInBits = VT.getSizeInBits();
5694 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5694, __PRETTY_FUNCTION__))
;
5695 unsigned NumElts = SizeInBits / EltSizeInBits;
5696
5697 // Bitcast a source array of element bits to the target size.
5698 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5699 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5700 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5701 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5702, __PRETTY_FUNCTION__))
5702 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/X86/X86ISelLowering.cpp"
, 5702, __PRETTY_FUNCTION__))
;
5703
5704 // Don't split if we don't allow undef bits.
5705 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5706 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5707 return false;
5708
5709 // If we're already the right size, don't bother bitcasting.
5710 if (NumSrcElts == NumElts) {
5711 UndefElts = UndefSrcElts;
5712 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5713 return true;
5714 }
5715
5716 // Extract all the undef/constant element data and pack into single bitsets.
5717 APInt UndefBits(SizeInBits, 0);
5718 APInt MaskBits(SizeInBits, 0);
5719
5720 for (unsigned i = 0; i != NumSrcElts; ++i) {
5721 unsigned BitOffset = i * SrcEltSizeInBits;
5722 if (UndefSrcElts[i])
5723 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5724 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5725 }
5726
5727 // Split the undef/constant single bitset data into the target elements.
5728 UndefElts = APInt(NumElts, 0);
5729 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5730
5731 for (unsigned i = 0; i != NumElts; ++i) {
5732 unsigned BitOffset = i * EltSizeInBits;
5733 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5734
5735 // Only treat an element as UNDEF if all bits are UNDEF.
5736 if (UndefEltBits.isAllOnesValue()) {
5737 if (!AllowWholeUndefs)
5738 return false;
5739 UndefElts.setBit(i);
5740 continue;
5741 }
5742
5743 // If only some bits are UNDEF then treat them as zero (or bail if not
5744 // supported).
5745 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5746 return false;
5747
5748 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5749 EltBits[i] = Bits.getZExtValue();
5750 }
5751 return true;
5752 };
5753
5754 // Collect constant bits and insert into mask/undef bit masks.
5755 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5756 unsigned UndefBitIndex) {
5757 if (!Cst)
5758 return false;
5759 if (isa<UndefValue>(Cst)) {
5760 Undefs.setBit(UndefBitIndex);
5761 return true;
5762 }
5763 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5764 Mask = CInt->getValue();
5765 return true;
5766 }
5767 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5768 Mask = CFP->getValueAPF().bitcastToAPInt();
5769 return true;
5770 }
5771 return false;
5772 };
5773
5774 // Handle UNDEFs.
5775 if (Op.isUndef()) {
5776 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5777 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5778 return CastBitData(UndefSrcElts, SrcEltBits);
5779 }
5780
5781 // Extract scalar constant bits.
5782 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5783 APInt UndefSrcElts = APInt::getNullValue(1);
5784 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5785 return CastBitData(UndefSrcElts, SrcEltBits);
5786 }
5787 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5788 APInt UndefSrcElts = APInt::getNullValue(1);
5789 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5790 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5791 return CastBitData(UndefSrcElts, SrcEltBits);
5792 }
5793
5794 // Extract constant bits from build vector.
5795 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5796 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5797 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5798
5799 APInt UndefSrcElts(NumSrcElts, 0);
5800 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5801 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5802 const SDValue &Src = Op.getOperand(i);
5803 if (Src.isUndef()) {
5804 UndefSrcElts.setBit(i);
5805 continue;
5806 }
5807 auto *Cst = cast<ConstantSDNode>(Src);
5808 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5809 }
5810 return CastBitData(UndefSrcElts, SrcEltBits);
5811 }
5812 if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
5813 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5814 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5815
5816 APInt UndefSrcElts(NumSrcElts, 0);
5817 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5818 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5819 const SDValue &Src = Op.getOperand(i);
5820 if (Src.isUndef()) {
5821 UndefSrcElts.setBit(i);
5822 continue;
5823 }
5824 auto *Cst = cast<ConstantFPSDNode>(Src);
5825 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5826 SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
5827 }
5828 return CastBitData(UndefSrcElts, SrcEltBits);
5829 }
5830
5831 // Extract constant bits from constant pool vector.
5832 if (auto *Cst = getTargetConstantFromNode(Op)) {
5833 Type *CstTy = Cst->getType();
5834 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
5835 if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5836 return false;
5837
5838 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5839 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5840
5841 APInt UndefSrcElts(NumSrcElts, 0);
5842 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5843 for (unsigned i = 0; i != NumSrcElts; ++i)
5844 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5845 UndefSrcElts, i))
5846 return false;
5847
5848 return CastBitData(UndefSrcElts, SrcEltBits);
5849 }
5850
5851 // Extract constant bits from a broadcasted constant pool scalar.
5852 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5853 EltSizeInBits <= VT.getScalarSizeInBits()) {
5854 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5855 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5856 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5857
5858 APInt UndefSrcElts(NumSrcElts, 0);
5859 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5860 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5861 if (UndefSrcElts[0])
5862 UndefSrcElts.setBits(0, NumSrcElts);
5863 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5864 return CastBitData(UndefSrcElts, SrcEltBits);
5865 }
5866 }
5867 }
5868
5869 // Extract a rematerialized scalar constant insertion.
5870 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5871 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5872 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5873 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5874 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5875
5876 APInt UndefSrcElts(NumSrcElts, 0);
5877 SmallVector<APInt, 64> SrcEltBits;
5878 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5879 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5880 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5881 return CastBitData(UndefSrcElts, SrcEltBits);
5882 }
5883
5884 // Extract constant bits from a subvector's source.
5885 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5886 isa<ConstantSDNode>(Op.getOperand(1))) {
5887 // TODO - support extract_subvector through bitcasts.
5888 if (EltSizeInBits != VT.getScalarSizeInBits())
5889 return false;
5890
5891 if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
5892 UndefElts, EltBits, AllowWholeUndefs,
5893 AllowPartialUndefs)) {
5894 EVT SrcVT = Op.getOperand(0).getValueType();
5895 unsigned NumSrcElts = SrcVT.getVectorNumElements();
5896 unsigned NumSubElts = VT.getVectorNumElements();
5897 unsigned BaseIdx = Op.getConstantOperandVal(1);
5898 UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
5899 if ((BaseIdx + NumSubElts) != NumSrcElts)
5900 EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
5901 if (BaseIdx != 0)
5902 EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
5903 return true;
5904 }
5905 }
5906
5907 // Extract constant bits from shuffle node sources.
5908 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
5909 // TODO - support shuffle through bitcasts.
5910 if (EltSizeInBits != VT.getScalarSizeInBits())
5911 return false;
5912
5913 ArrayRef<int> Mask = SVN->getMask();
5914 if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
5915 llvm::any_of(Mask, [](int M) { return M < 0; }))
5916 return false;
5917
5918 APInt UndefElts0, UndefElts1;
5919 SmallVector<APInt, 32> EltBits0, EltBits1;