Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 14384, column 47
The result of the '/' expression is undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn362543/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn362543=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-06-05-060531-1271-1 -x c++ /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp -faddrsig
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/EHPersonalities.h"
29#include "llvm/CodeGen/IntrinsicLowering.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineJumpTableInfo.h"
34#include "llvm/CodeGen/MachineModuleInfo.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/WinEHFuncInfo.h"
38#include "llvm/IR/CallSite.h"
39#include "llvm/IR/CallingConv.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/DiagnosticInfo.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/GlobalAlias.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCContext.h"
50#include "llvm/MC/MCExpr.h"
51#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownBits.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Target/TargetOptions.h"
58#include <algorithm>
59#include <bitset>
60#include <cctype>
61#include <numeric>
62using namespace llvm;
63
64#define DEBUG_TYPE"x86-isel" "x86-isel"
65
66STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
67
68static cl::opt<bool> ExperimentalVectorWideningLegalization(
69 "x86-experimental-vector-widening-legalization", cl::init(false),
70 cl::desc("Enable an experimental vector type legalization through widening "
71 "rather than promotion."),
72 cl::Hidden);
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc("Sets the preferable loop alignment for experiments "
77 "(the last x86-experimental-pref-loop-alignment bits"
78 " of the loop header PC will be 0)."),
79 cl::Hidden);
80
81static cl::opt<bool> MulConstantOptimization(
82 "mul-constant-optimization", cl::init(true),
83 cl::desc("Replace 'mul x, Const' with more effective instructions like "
84 "SHIFT, LEA, etc."),
85 cl::Hidden);
86
87/// Call this when the user attempts to do something unsupported, like
88/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
89/// report_fatal_error, so calling code should attempt to recover without
90/// crashing.
91static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
92 const char *Msg) {
93 MachineFunction &MF = DAG.getMachineFunction();
94 DAG.getContext()->diagnose(
95 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
96}
97
98X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
99 const X86Subtarget &STI)
100 : TargetLowering(TM), Subtarget(STI) {
101 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
102 X86ScalarSSEf64 = Subtarget.hasSSE2();
103 X86ScalarSSEf32 = Subtarget.hasSSE1();
104 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
105
106 // Set up the TargetLowering object.
107
108 // X86 is weird. It always uses i8 for shift amounts and setcc results.
109 setBooleanContents(ZeroOrOneBooleanContent);
110 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
111 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
112
113 // For 64-bit, since we have so many registers, use the ILP scheduler.
114 // For 32-bit, use the register pressure specific scheduling.
115 // For Atom, always use ILP scheduling.
116 if (Subtarget.isAtom())
117 setSchedulingPreference(Sched::ILP);
118 else if (Subtarget.is64Bit())
119 setSchedulingPreference(Sched::ILP);
120 else
121 setSchedulingPreference(Sched::RegPressure);
122 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
123 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
124
125 // Bypass expensive divides and use cheaper ones.
126 if (TM.getOptLevel() >= CodeGenOpt::Default) {
127 if (Subtarget.hasSlowDivide32())
128 addBypassSlowDiv(32, 8);
129 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
130 addBypassSlowDiv(64, 32);
131 }
132
133 if (Subtarget.isTargetKnownWindowsMSVC() ||
134 Subtarget.isTargetWindowsItanium()) {
135 // Setup Windows compiler runtime calls.
136 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
137 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
138 setLibcallName(RTLIB::SREM_I64, "_allrem");
139 setLibcallName(RTLIB::UREM_I64, "_aullrem");
140 setLibcallName(RTLIB::MUL_I64, "_allmul");
141 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
142 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
143 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
146 }
147
148 if (Subtarget.isTargetDarwin()) {
149 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
150 setUseUnderscoreSetJmp(false);
151 setUseUnderscoreLongJmp(false);
152 } else if (Subtarget.isTargetWindowsGNU()) {
153 // MS runtime is weird: it exports _setjmp, but longjmp!
154 setUseUnderscoreSetJmp(true);
155 setUseUnderscoreLongJmp(false);
156 } else {
157 setUseUnderscoreSetJmp(true);
158 setUseUnderscoreLongJmp(true);
159 }
160
161 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
162 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
163 // FIXME: Should we be limitting the atomic size on other configs? Default is
164 // 1024.
165 if (!Subtarget.hasCmpxchg8b())
166 setMaxAtomicSizeInBitsSupported(32);
167
168 // Set up the register classes.
169 addRegisterClass(MVT::i8, &X86::GR8RegClass);
170 addRegisterClass(MVT::i16, &X86::GR16RegClass);
171 addRegisterClass(MVT::i32, &X86::GR32RegClass);
172 if (Subtarget.is64Bit())
173 addRegisterClass(MVT::i64, &X86::GR64RegClass);
174
175 for (MVT VT : MVT::integer_valuetypes())
176 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
177
178 // We don't accept any truncstore of integer registers.
179 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
180 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
181 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
182 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
183 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
184 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
185
186 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
188 // SETOEQ and SETUNE require checking two conditions.
189 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
190 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
191 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
192 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
193 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
194 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
195
196 // Integer absolute.
197 if (Subtarget.hasCMov()) {
198 setOperationAction(ISD::ABS , MVT::i16 , Custom);
199 setOperationAction(ISD::ABS , MVT::i32 , Custom);
200 }
201 setOperationAction(ISD::ABS , MVT::i64 , Custom);
202
203 // Funnel shifts.
204 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
205 setOperationAction(ShiftOp , MVT::i16 , Custom);
206 setOperationAction(ShiftOp , MVT::i32 , Custom);
207 if (Subtarget.is64Bit())
208 setOperationAction(ShiftOp , MVT::i64 , Custom);
209 }
210
211 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
212 // operation.
213 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
214 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
215 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
216
217 if (Subtarget.is64Bit()) {
218 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
219 // f32/f64 are legal, f80 is custom.
220 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
221 else
222 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
223 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
224 } else if (!Subtarget.useSoftFloat()) {
225 // We have an algorithm for SSE2->double, and we turn this into a
226 // 64-bit FILD followed by conditional FADD for other targets.
227 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
228 // We have an algorithm for SSE2, and we turn this into a 64-bit
229 // FILD or VCVTUSI2SS/SD for other targets.
230 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
231 } else {
232 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
233 }
234
235 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
236 // this operation.
237 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
239
240 if (!Subtarget.useSoftFloat()) {
241 // SSE has no i16 to fp conversion, only i32.
242 if (X86ScalarSSEf32) {
243 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
244 // f32 and f64 cases are Legal, f80 case is not
245 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
246 } else {
247 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
248 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
249 }
250 } else {
251 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
252 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
253 }
254
255 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
256 // this operation.
257 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
259
260 if (!Subtarget.useSoftFloat()) {
261 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
262 // are Legal, f80 is custom lowered.
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
264 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
265
266 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
267 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
268 } else {
269 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
270 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
271 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
272 }
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
277 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
279
280 if (Subtarget.is64Bit()) {
281 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
282 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
283 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
284 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
285 } else {
286 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
287 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
288 }
289 } else if (!Subtarget.useSoftFloat()) {
290 // Since AVX is a superset of SSE3, only check for SSE here.
291 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
292 // Expand FP_TO_UINT into a select.
293 // FIXME: We would like to use a Custom expander here eventually to do
294 // the optimal thing for SSE vs. the default expansion in the legalizer.
295 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
296 else
297 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
298 // With SSE3 we can use fisttpll to convert to a signed i64; without
299 // SSE, we're stuck with a fistpll.
300 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
301
302 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
303 }
304
305 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
306 if (!X86ScalarSSEf64) {
307 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
308 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
309 if (Subtarget.is64Bit()) {
310 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
311 // Without SSE, i64->f64 goes through memory.
312 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
313 }
314 } else if (!Subtarget.is64Bit())
315 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
316
317 // Scalar integer divide and remainder are lowered to use operations that
318 // produce two results, to match the available instructions. This exposes
319 // the two-result form to trivial CSE, which is able to combine x/y and x%y
320 // into a single instruction.
321 //
322 // Scalar integer multiply-high is also lowered to use two-result
323 // operations, to match the available instructions. However, plain multiply
324 // (low) operations are left as Legal, as there are single-result
325 // instructions for this in x86. Using the two-result multiply instructions
326 // when both high and low results are needed must be arranged by dagcombine.
327 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328 setOperationAction(ISD::MULHS, VT, Expand);
329 setOperationAction(ISD::MULHU, VT, Expand);
330 setOperationAction(ISD::SDIV, VT, Expand);
331 setOperationAction(ISD::UDIV, VT, Expand);
332 setOperationAction(ISD::SREM, VT, Expand);
333 setOperationAction(ISD::UREM, VT, Expand);
334 }
335
336 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
337 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
338 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
339 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
340 setOperationAction(ISD::BR_CC, VT, Expand);
341 setOperationAction(ISD::SELECT_CC, VT, Expand);
342 }
343 if (Subtarget.is64Bit())
344 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
345 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
346 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
347 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
348 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
349
350 setOperationAction(ISD::FREM , MVT::f32 , Expand);
351 setOperationAction(ISD::FREM , MVT::f64 , Expand);
352 setOperationAction(ISD::FREM , MVT::f80 , Expand);
353 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
354
355 // Promote the i8 variants and force them on up to i32 which has a shorter
356 // encoding.
357 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
358 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
359 if (!Subtarget.hasBMI()) {
360 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
361 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
362 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
363 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
364 if (Subtarget.is64Bit()) {
365 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
366 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
367 }
368 }
369
370 if (Subtarget.hasLZCNT()) {
371 // When promoting the i8 variants, force them to i32 for a shorter
372 // encoding.
373 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
374 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
375 } else {
376 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
377 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
378 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
379 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
380 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
381 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
382 if (Subtarget.is64Bit()) {
383 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
384 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
385 }
386 }
387
388 // Special handling for half-precision floating point conversions.
389 // If we don't have F16C support, then lower half float conversions
390 // into library calls.
391 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
392 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
393 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
394 }
395
396 // There's never any support for operations beyond MVT::f32.
397 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
398 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
399 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
400 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
401
402 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
403 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
404 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
405 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
406 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
407 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
408
409 if (Subtarget.hasPOPCNT()) {
410 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
411 } else {
412 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
413 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
414 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
415 if (Subtarget.is64Bit())
416 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
417 else
418 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
419 }
420
421 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
422
423 if (!Subtarget.hasMOVBE())
424 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
425
426 // These should be promoted to a larger select which is supported.
427 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
428 // X86 wants to expand cmov itself.
429 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
430 setOperationAction(ISD::SELECT, VT, Custom);
431 setOperationAction(ISD::SETCC, VT, Custom);
432 }
433 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
434 if (VT == MVT::i64 && !Subtarget.is64Bit())
435 continue;
436 setOperationAction(ISD::SELECT, VT, Custom);
437 setOperationAction(ISD::SETCC, VT, Custom);
438 }
439
440 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
441 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
442 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
443
444 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
445 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
446 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
447 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
448 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
450 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
451 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
452
453 // Darwin ABI issue.
454 for (auto VT : { MVT::i32, MVT::i64 }) {
455 if (VT == MVT::i64 && !Subtarget.is64Bit())
456 continue;
457 setOperationAction(ISD::ConstantPool , VT, Custom);
458 setOperationAction(ISD::JumpTable , VT, Custom);
459 setOperationAction(ISD::GlobalAddress , VT, Custom);
460 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
461 setOperationAction(ISD::ExternalSymbol , VT, Custom);
462 setOperationAction(ISD::BlockAddress , VT, Custom);
463 }
464
465 // 64-bit shl, sra, srl (iff 32-bit x86)
466 for (auto VT : { MVT::i32, MVT::i64 }) {
467 if (VT == MVT::i64 && !Subtarget.is64Bit())
468 continue;
469 setOperationAction(ISD::SHL_PARTS, VT, Custom);
470 setOperationAction(ISD::SRA_PARTS, VT, Custom);
471 setOperationAction(ISD::SRL_PARTS, VT, Custom);
472 }
473
474 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
475 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
476
477 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
478
479 // Expand certain atomics
480 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
484 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
485 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
486 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
487 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
488 }
489
490 if (!Subtarget.is64Bit())
491 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
492
493 if (Subtarget.hasCmpxchg16b()) {
494 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
495 }
496
497 // FIXME - use subtarget debug flags
498 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
499 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
500 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
501 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
502 }
503
504 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
505 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
506
507 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
508 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
509
510 setOperationAction(ISD::TRAP, MVT::Other, Legal);
511 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
512
513 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
514 setOperationAction(ISD::VASTART , MVT::Other, Custom);
515 setOperationAction(ISD::VAEND , MVT::Other, Expand);
516 bool Is64Bit = Subtarget.is64Bit();
517 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
518 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
519
520 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
521 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
522
523 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
524
525 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
526 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
527 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
528
529 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
530 // f32 and f64 use SSE.
531 // Set up the FP register classes.
532 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
533 : &X86::FR32RegClass);
534 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
535 : &X86::FR64RegClass);
536
537 for (auto VT : { MVT::f32, MVT::f64 }) {
538 // Use ANDPD to simulate FABS.
539 setOperationAction(ISD::FABS, VT, Custom);
540
541 // Use XORP to simulate FNEG.
542 setOperationAction(ISD::FNEG, VT, Custom);
543
544 // Use ANDPD and ORPD to simulate FCOPYSIGN.
545 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
546
547 // These might be better off as horizontal vector ops.
548 setOperationAction(ISD::FADD, VT, Custom);
549 setOperationAction(ISD::FSUB, VT, Custom);
550
551 // We don't support sin/cos/fmod
552 setOperationAction(ISD::FSIN , VT, Expand);
553 setOperationAction(ISD::FCOS , VT, Expand);
554 setOperationAction(ISD::FSINCOS, VT, Expand);
555 }
556
557 // Lower this to MOVMSK plus an AND.
558 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
559 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
560
561 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
562 // Use SSE for f32, x87 for f64.
563 // Set up the FP register classes.
564 addRegisterClass(MVT::f32, &X86::FR32RegClass);
565 if (UseX87)
566 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
567
568 // Use ANDPS to simulate FABS.
569 setOperationAction(ISD::FABS , MVT::f32, Custom);
570
571 // Use XORP to simulate FNEG.
572 setOperationAction(ISD::FNEG , MVT::f32, Custom);
573
574 if (UseX87)
575 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
576
577 // Use ANDPS and ORPS to simulate FCOPYSIGN.
578 if (UseX87)
579 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
580 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
581
582 // We don't support sin/cos/fmod
583 setOperationAction(ISD::FSIN , MVT::f32, Expand);
584 setOperationAction(ISD::FCOS , MVT::f32, Expand);
585 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
586
587 if (UseX87) {
588 // Always expand sin/cos functions even though x87 has an instruction.
589 setOperationAction(ISD::FSIN, MVT::f64, Expand);
590 setOperationAction(ISD::FCOS, MVT::f64, Expand);
591 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
592 }
593 } else if (UseX87) {
594 // f32 and f64 in x87.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
597 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
598
599 for (auto VT : { MVT::f32, MVT::f64 }) {
600 setOperationAction(ISD::UNDEF, VT, Expand);
601 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
602
603 // Always expand sin/cos functions even though x87 has an instruction.
604 setOperationAction(ISD::FSIN , VT, Expand);
605 setOperationAction(ISD::FCOS , VT, Expand);
606 setOperationAction(ISD::FSINCOS, VT, Expand);
607 }
608 }
609
610 // Expand FP32 immediates into loads from the stack, save special cases.
611 if (isTypeLegal(MVT::f32)) {
612 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
613 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
614 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
615 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
616 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
617 } else // SSE immediates.
618 addLegalFPImmediate(APFloat(+0.0f)); // xorps
619 }
620 // Expand FP64 immediates into loads from the stack, save special cases.
621 if (isTypeLegal(MVT::f64)) {
622 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
623 addLegalFPImmediate(APFloat(+0.0)); // FLD0
624 addLegalFPImmediate(APFloat(+1.0)); // FLD1
625 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
626 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
627 } else // SSE immediates.
628 addLegalFPImmediate(APFloat(+0.0)); // xorpd
629 }
630
631 // We don't support FMA.
632 setOperationAction(ISD::FMA, MVT::f64, Expand);
633 setOperationAction(ISD::FMA, MVT::f32, Expand);
634
635 // Long double always uses X87, except f128 in MMX.
636 if (UseX87) {
637 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
638 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
639 : &X86::VR128RegClass);
640 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
641 setOperationAction(ISD::FABS , MVT::f128, Custom);
642 setOperationAction(ISD::FNEG , MVT::f128, Custom);
643 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
644 }
645
646 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
647 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
648 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
649 {
650 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
651 addLegalFPImmediate(TmpFlt); // FLD0
652 TmpFlt.changeSign();
653 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
654
655 bool ignored;
656 APFloat TmpFlt2(+1.0);
657 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
658 &ignored);
659 addLegalFPImmediate(TmpFlt2); // FLD1
660 TmpFlt2.changeSign();
661 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
662 }
663
664 // Always expand sin/cos functions even though x87 has an instruction.
665 setOperationAction(ISD::FSIN , MVT::f80, Expand);
666 setOperationAction(ISD::FCOS , MVT::f80, Expand);
667 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
668
669 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
670 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
671 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
672 setOperationAction(ISD::FRINT, MVT::f80, Expand);
673 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
674 setOperationAction(ISD::FMA, MVT::f80, Expand);
675 setOperationAction(ISD::LROUND, MVT::f80, Expand);
676 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
677 setOperationAction(ISD::LRINT, MVT::f80, Expand);
678 setOperationAction(ISD::LLRINT, MVT::f80, Expand);
679 }
680
681 // Always use a library call for pow.
682 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
683 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
684 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
685
686 setOperationAction(ISD::FLOG, MVT::f80, Expand);
687 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
688 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
689 setOperationAction(ISD::FEXP, MVT::f80, Expand);
690 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
691 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
692 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
693
694 // Some FP actions are always expanded for vector types.
695 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
696 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
697 setOperationAction(ISD::FSIN, VT, Expand);
698 setOperationAction(ISD::FSINCOS, VT, Expand);
699 setOperationAction(ISD::FCOS, VT, Expand);
700 setOperationAction(ISD::FREM, VT, Expand);
701 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
702 setOperationAction(ISD::FPOW, VT, Expand);
703 setOperationAction(ISD::FLOG, VT, Expand);
704 setOperationAction(ISD::FLOG2, VT, Expand);
705 setOperationAction(ISD::FLOG10, VT, Expand);
706 setOperationAction(ISD::FEXP, VT, Expand);
707 setOperationAction(ISD::FEXP2, VT, Expand);
708 }
709
710 // First set operation action for all vector types to either promote
711 // (for widening) or expand (for scalarization). Then we will selectively
712 // turn on ones that can be effectively codegen'd.
713 for (MVT VT : MVT::vector_valuetypes()) {
714 setOperationAction(ISD::SDIV, VT, Expand);
715 setOperationAction(ISD::UDIV, VT, Expand);
716 setOperationAction(ISD::SREM, VT, Expand);
717 setOperationAction(ISD::UREM, VT, Expand);
718 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
719 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
720 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
721 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
722 setOperationAction(ISD::FMA, VT, Expand);
723 setOperationAction(ISD::FFLOOR, VT, Expand);
724 setOperationAction(ISD::FCEIL, VT, Expand);
725 setOperationAction(ISD::FTRUNC, VT, Expand);
726 setOperationAction(ISD::FRINT, VT, Expand);
727 setOperationAction(ISD::FNEARBYINT, VT, Expand);
728 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
729 setOperationAction(ISD::MULHS, VT, Expand);
730 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
731 setOperationAction(ISD::MULHU, VT, Expand);
732 setOperationAction(ISD::SDIVREM, VT, Expand);
733 setOperationAction(ISD::UDIVREM, VT, Expand);
734 setOperationAction(ISD::CTPOP, VT, Expand);
735 setOperationAction(ISD::CTTZ, VT, Expand);
736 setOperationAction(ISD::CTLZ, VT, Expand);
737 setOperationAction(ISD::ROTL, VT, Expand);
738 setOperationAction(ISD::ROTR, VT, Expand);
739 setOperationAction(ISD::BSWAP, VT, Expand);
740 setOperationAction(ISD::SETCC, VT, Expand);
741 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
742 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
743 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
744 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
745 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
746 setOperationAction(ISD::TRUNCATE, VT, Expand);
747 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
748 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
749 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
750 setOperationAction(ISD::SELECT_CC, VT, Expand);
751 for (MVT InnerVT : MVT::vector_valuetypes()) {
752 setTruncStoreAction(InnerVT, VT, Expand);
753
754 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
755 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
756
757 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
758 // types, we have to deal with them whether we ask for Expansion or not.
759 // Setting Expand causes its own optimisation problems though, so leave
760 // them legal.
761 if (VT.getVectorElementType() == MVT::i1)
762 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
763
764 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
765 // split/scalarized right now.
766 if (VT.getVectorElementType() == MVT::f16)
767 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
768 }
769 }
770
771 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
772 // with -msoft-float, disable use of MMX as well.
773 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
774 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
775 // No operations on x86mmx supported, everything uses intrinsics.
776 }
777
778 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
779 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
780 : &X86::VR128RegClass);
781
782 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
783 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
784 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
785 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
786 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
787 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
788 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
789 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
790 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
791 }
792
793 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
794 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
795 : &X86::VR128RegClass);
796
797 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
798 // registers cannot be used even for integer operations.
799 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
800 : &X86::VR128RegClass);
801 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
802 : &X86::VR128RegClass);
803 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
804 : &X86::VR128RegClass);
805 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
806 : &X86::VR128RegClass);
807
808 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
809 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
810 setOperationAction(ISD::SDIV, VT, Custom);
811 setOperationAction(ISD::SREM, VT, Custom);
812 setOperationAction(ISD::UDIV, VT, Custom);
813 setOperationAction(ISD::UREM, VT, Custom);
814 }
815
816 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
817 setOperationAction(ISD::MUL, MVT::v2i16, Custom);
818 setOperationAction(ISD::MUL, MVT::v2i32, Custom);
819 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
820 setOperationAction(ISD::MUL, MVT::v4i16, Custom);
821 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
822
823 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
824 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
825 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
826 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
827 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
828 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
829 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
830 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
831 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
832 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
833 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
834 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
835 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
836
837 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
838 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
839 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
840 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
841 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
842 }
843
844 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
845 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
846 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
847 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
848 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
849 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
850 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
851 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
852 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
853 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
854 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
855 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
856
857 if (!ExperimentalVectorWideningLegalization) {
858 // Use widening instead of promotion.
859 for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
860 MVT::v4i16, MVT::v2i16 }) {
861 setOperationAction(ISD::UADDSAT, VT, Custom);
862 setOperationAction(ISD::SADDSAT, VT, Custom);
863 setOperationAction(ISD::USUBSAT, VT, Custom);
864 setOperationAction(ISD::SSUBSAT, VT, Custom);
865 }
866 }
867
868 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
869 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
870 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
871
872 // Provide custom widening for v2f32 setcc. This is really for VLX when
873 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
874 // type legalization changing the result type to v4i1 during widening.
875 // It works fine for SSE2 and is probably faster so no need to qualify with
876 // VLX support.
877 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
878
879 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
880 setOperationAction(ISD::SETCC, VT, Custom);
881 setOperationAction(ISD::CTPOP, VT, Custom);
882 setOperationAction(ISD::ABS, VT, Custom);
883
884 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
885 // setcc all the way to isel and prefer SETGT in some isel patterns.
886 setCondCodeAction(ISD::SETLT, VT, Custom);
887 setCondCodeAction(ISD::SETLE, VT, Custom);
888 }
889
890 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
891 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
892 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
893 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
894 setOperationAction(ISD::VSELECT, VT, Custom);
895 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
896 }
897
898 // We support custom legalizing of sext and anyext loads for specific
899 // memory vector types which we can load as a scalar (or sequence of
900 // scalars) and extend in-register to a legal 128-bit vector type. For sext
901 // loads these must work with a single scalar load.
902 for (MVT VT : MVT::integer_vector_valuetypes()) {
903 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
904 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
905 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
906 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
907 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
908 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
909 }
910
911 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
912 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
913 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
914 setOperationAction(ISD::VSELECT, VT, Custom);
915
916 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
917 continue;
918
919 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
920 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
921 }
922
923 // Custom lower v2i64 and v2f64 selects.
924 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
925 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
926 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
927 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
928 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
929
930 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
931 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
932 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
933
934 // Custom legalize these to avoid over promotion or custom promotion.
935 setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
936 setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
937 setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
938 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
939 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
940 setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
941 setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
942 setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
943 setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
944 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
945
946 // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
947 // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
948 // split again based on the input type, this will cause an AssertSExt i16 to
949 // be emitted instead of an AssertZExt. This will allow packssdw followed by
950 // packuswb to be used to truncate to v8i8. This is necessary since packusdw
951 // isn't available until sse4.1.
952 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
953
954 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
955 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
956
957 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
958
959 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
960 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
961
962 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
963 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
964
965 // We want to legalize this to an f64 load rather than an i64 load on
966 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
967 // store.
968 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
969 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
970 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
971 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
972 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
973 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
974 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
975 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
976
977 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
978 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
979 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
980 if (!Subtarget.hasAVX512())
981 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
982
983 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
984 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
985 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
986
987 if (ExperimentalVectorWideningLegalization) {
988 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
989
990 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
991 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
992 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
993 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
994 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
995 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
996 } else {
997 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
998 }
999
1000 // In the customized shift lowering, the legal v4i32/v2i64 cases
1001 // in AVX2 will be recognized.
1002 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1003 setOperationAction(ISD::SRL, VT, Custom);
1004 setOperationAction(ISD::SHL, VT, Custom);
1005 setOperationAction(ISD::SRA, VT, Custom);
1006 }
1007
1008 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1009 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1010
1011 // With AVX512, expanding (and promoting the shifts) is better.
1012 if (!Subtarget.hasAVX512())
1013 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1014 }
1015
1016 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1017 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1018 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1019 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1020 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1021 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1022 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1023 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1024 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1025
1026 // These might be better off as horizontal vector ops.
1027 setOperationAction(ISD::ADD, MVT::i16, Custom);
1028 setOperationAction(ISD::ADD, MVT::i32, Custom);
1029 setOperationAction(ISD::SUB, MVT::i16, Custom);
1030 setOperationAction(ISD::SUB, MVT::i32, Custom);
1031 }
1032
1033 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1034 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1035 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1036 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1037 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1038 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1039 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1040 }
1041
1042 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1043 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1044 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1045 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1046 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1047 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1048 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1049 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1050
1051 // FIXME: Do we need to handle scalar-to-vector here?
1052 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1053
1054 // We directly match byte blends in the backend as they match the VSELECT
1055 // condition form.
1056 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1057
1058 // SSE41 brings specific instructions for doing vector sign extend even in
1059 // cases where we don't have SRA.
1060 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1061 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1062 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1063 }
1064
1065 if (!ExperimentalVectorWideningLegalization) {
1066 // Avoid narrow result types when widening. The legal types are listed
1067 // in the next loop.
1068 for (MVT VT : MVT::integer_vector_valuetypes()) {
1069 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1070 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1071 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1072 }
1073 }
1074
1075 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1076 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1077 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1078 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1079 if (!ExperimentalVectorWideningLegalization)
1080 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
1081 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1082 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1083 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1084 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1085 }
1086
1087 // i8 vectors are custom because the source register and source
1088 // source memory operand types are not the same width.
1089 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1090 }
1091
1092 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1093 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1094 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1095 setOperationAction(ISD::ROTL, VT, Custom);
1096
1097 // XOP can efficiently perform BITREVERSE with VPPERM.
1098 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1099 setOperationAction(ISD::BITREVERSE, VT, Custom);
1100
1101 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1102 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1103 setOperationAction(ISD::BITREVERSE, VT, Custom);
1104 }
1105
1106 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1107 bool HasInt256 = Subtarget.hasInt256();
1108
1109 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1110 : &X86::VR256RegClass);
1111 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1112 : &X86::VR256RegClass);
1113 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1114 : &X86::VR256RegClass);
1115 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1116 : &X86::VR256RegClass);
1117 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1118 : &X86::VR256RegClass);
1119 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1120 : &X86::VR256RegClass);
1121
1122 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1123 setOperationAction(ISD::FFLOOR, VT, Legal);
1124 setOperationAction(ISD::FCEIL, VT, Legal);
1125 setOperationAction(ISD::FTRUNC, VT, Legal);
1126 setOperationAction(ISD::FRINT, VT, Legal);
1127 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1128 setOperationAction(ISD::FNEG, VT, Custom);
1129 setOperationAction(ISD::FABS, VT, Custom);
1130 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1131 }
1132
1133 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1134 // even though v8i16 is a legal type.
1135 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1136 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1137 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1138
1139 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1140 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1141
1142 if (!Subtarget.hasAVX512())
1143 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1144
1145 // In the customized shift lowering, the legal v8i32/v4i64 cases
1146 // in AVX2 will be recognized.
1147 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1148 setOperationAction(ISD::SRL, VT, Custom);
1149 setOperationAction(ISD::SHL, VT, Custom);
1150 setOperationAction(ISD::SRA, VT, Custom);
1151 }
1152
1153 // These types need custom splitting if their input is a 128-bit vector.
1154 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1155 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1156 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1157 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1158
1159 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1160 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1161
1162 // With BWI, expanding (and promoting the shifts) is the better.
1163 if (!Subtarget.hasBWI())
1164 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1165
1166 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1167 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1168 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1169 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1170 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1171 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1172
1173 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1174 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1175 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1176 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1177 }
1178
1179 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1180 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1181 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1182 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1183
1184 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1185 setOperationAction(ISD::SETCC, VT, Custom);
1186 setOperationAction(ISD::CTPOP, VT, Custom);
1187 setOperationAction(ISD::CTLZ, VT, Custom);
1188
1189 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1190 // setcc all the way to isel and prefer SETGT in some isel patterns.
1191 setCondCodeAction(ISD::SETLT, VT, Custom);
1192 setCondCodeAction(ISD::SETLE, VT, Custom);
1193 }
1194
1195 if (Subtarget.hasAnyFMA()) {
1196 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1197 MVT::v2f64, MVT::v4f64 })
1198 setOperationAction(ISD::FMA, VT, Legal);
1199 }
1200
1201 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1202 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1203 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1204 }
1205
1206 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1207 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1208 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1209 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1210
1211 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1212 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1213 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1214 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1215 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1216 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1217
1218 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1219 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1220 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1221 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1222 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1223
1224 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1225 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1226 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1227 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1228 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1229 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1230 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1231 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1232
1233 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1234 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1235 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1236 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1237 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1238 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1239 }
1240
1241 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1242 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1243 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1244 }
1245
1246 if (HasInt256) {
1247 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1248 // when we have a 256bit-wide blend with immediate.
1249 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1250
1251 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1252 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1253 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1254 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1255 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1256 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1257 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1258 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1259 }
1260 }
1261
1262 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1263 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1264 setOperationAction(ISD::MLOAD, VT, Legal);
1265 setOperationAction(ISD::MSTORE, VT, Legal);
1266 }
1267
1268 // Extract subvector is special because the value type
1269 // (result) is 128-bit but the source is 256-bit wide.
1270 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1271 MVT::v4f32, MVT::v2f64 }) {
1272 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1273 }
1274
1275 // Custom lower several nodes for 256-bit types.
1276 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1277 MVT::v8f32, MVT::v4f64 }) {
1278 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1279 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1280 setOperationAction(ISD::VSELECT, VT, Custom);
1281 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1282 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1283 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1284 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1285 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1286 setOperationAction(ISD::STORE, VT, Custom);
1287 }
1288
1289 if (HasInt256)
1290 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1291
1292 if (HasInt256) {
1293 // Custom legalize 2x32 to get a little better code.
1294 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1295 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1296
1297 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1298 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1299 setOperationAction(ISD::MGATHER, VT, Custom);
1300 }
1301 }
1302
1303 // This block controls legalization of the mask vector sizes that are
1304 // available with AVX512. 512-bit vectors are in a separate block controlled
1305 // by useAVX512Regs.
1306 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1307 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1308 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1309 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1310 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1311 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1312
1313 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1314 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1315 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1316
1317 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1318 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1319 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1320 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1321 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1322 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1323
1324 // There is no byte sized k-register load or store without AVX512DQ.
1325 if (!Subtarget.hasDQI()) {
1326 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1327 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1328 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1329 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1330
1331 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1332 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1333 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1334 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1335 }
1336
1337 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1338 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1339 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1340 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1341 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1342 }
1343
1344 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1345 setOperationAction(ISD::ADD, VT, Custom);
1346 setOperationAction(ISD::SUB, VT, Custom);
1347 setOperationAction(ISD::MUL, VT, Custom);
1348 setOperationAction(ISD::SETCC, VT, Custom);
1349 setOperationAction(ISD::SELECT, VT, Custom);
1350 setOperationAction(ISD::TRUNCATE, VT, Custom);
1351 setOperationAction(ISD::UADDSAT, VT, Custom);
1352 setOperationAction(ISD::SADDSAT, VT, Custom);
1353 setOperationAction(ISD::USUBSAT, VT, Custom);
1354 setOperationAction(ISD::SSUBSAT, VT, Custom);
1355
1356 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1357 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1358 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1359 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1360 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1361 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1362 setOperationAction(ISD::VSELECT, VT, Expand);
1363 }
1364
1365 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1366 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1367 }
1368
1369 // This block controls legalization for 512-bit operations with 32/64 bit
1370 // elements. 512-bits can be disabled based on prefer-vector-width and
1371 // required-vector-width function attributes.
1372 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1373 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1374 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1375 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1376 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1377
1378 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1379 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1380 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1381 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1382 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1383 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1384 }
1385
1386 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1387 setOperationAction(ISD::FNEG, VT, Custom);
1388 setOperationAction(ISD::FABS, VT, Custom);
1389 setOperationAction(ISD::FMA, VT, Legal);
1390 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1391 }
1392
1393 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1394 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1395 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1396 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1397 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1398 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1399 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1400 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1401 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1402 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1403
1404 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1405 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1406 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1407 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1408 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1409
1410 if (!Subtarget.hasVLX()) {
1411 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1412 // to 512-bit rather than use the AVX2 instructions so that we can use
1413 // k-masks.
1414 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1415 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1416 setOperationAction(ISD::MLOAD, VT, Custom);
1417 setOperationAction(ISD::MSTORE, VT, Custom);
1418 }
1419 }
1420
1421 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1422 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1423 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1424 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1425 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1426 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1427 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1428 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1429
1430 if (ExperimentalVectorWideningLegalization) {
1431 // Need to custom widen this if we don't have AVX512BW.
1432 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1433 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1434 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1435 }
1436
1437 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1438 setOperationAction(ISD::FFLOOR, VT, Legal);
1439 setOperationAction(ISD::FCEIL, VT, Legal);
1440 setOperationAction(ISD::FTRUNC, VT, Legal);
1441 setOperationAction(ISD::FRINT, VT, Legal);
1442 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1443 }
1444
1445 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1446 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1447 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1448 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1449 }
1450
1451 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1452 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1453 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1454 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1455
1456 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1457 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1458
1459 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1460 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1461
1462 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1463 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1464 setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
1465 setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
1466 setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
1467 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1468
1469 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1470 setOperationAction(ISD::SMAX, VT, Legal);
1471 setOperationAction(ISD::UMAX, VT, Legal);
1472 setOperationAction(ISD::SMIN, VT, Legal);
1473 setOperationAction(ISD::UMIN, VT, Legal);
1474 setOperationAction(ISD::ABS, VT, Legal);
1475 setOperationAction(ISD::SRL, VT, Custom);
1476 setOperationAction(ISD::SHL, VT, Custom);
1477 setOperationAction(ISD::SRA, VT, Custom);
1478 setOperationAction(ISD::CTPOP, VT, Custom);
1479 setOperationAction(ISD::ROTL, VT, Custom);
1480 setOperationAction(ISD::ROTR, VT, Custom);
1481 setOperationAction(ISD::SETCC, VT, Custom);
1482
1483 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1484 // setcc all the way to isel and prefer SETGT in some isel patterns.
1485 setCondCodeAction(ISD::SETLT, VT, Custom);
1486 setCondCodeAction(ISD::SETLE, VT, Custom);
1487 }
1488
1489 if (Subtarget.hasDQI()) {
1490 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1491 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1492 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1493 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1494
1495 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1496 }
1497
1498 if (Subtarget.hasCDI()) {
1499 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1500 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1501 setOperationAction(ISD::CTLZ, VT, Legal);
1502 }
1503 } // Subtarget.hasCDI()
1504
1505 if (Subtarget.hasVPOPCNTDQ()) {
1506 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1507 setOperationAction(ISD::CTPOP, VT, Legal);
1508 }
1509
1510 // Extract subvector is special because the value type
1511 // (result) is 256-bit but the source is 512-bit wide.
1512 // 128-bit was made Legal under AVX1.
1513 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1514 MVT::v8f32, MVT::v4f64 })
1515 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1516
1517 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1518 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1519 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1520 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1521 setOperationAction(ISD::VSELECT, VT, Custom);
1522 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1523 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1524 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1525 setOperationAction(ISD::MLOAD, VT, Legal);
1526 setOperationAction(ISD::MSTORE, VT, Legal);
1527 setOperationAction(ISD::MGATHER, VT, Custom);
1528 setOperationAction(ISD::MSCATTER, VT, Custom);
1529 }
1530 // Need to custom split v32i16/v64i8 bitcasts.
1531 if (!Subtarget.hasBWI()) {
1532 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1533 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1534 }
1535
1536 if (Subtarget.hasVBMI2()) {
1537 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1538 setOperationAction(ISD::FSHL, VT, Custom);
1539 setOperationAction(ISD::FSHR, VT, Custom);
1540 }
1541 }
1542 }// has AVX-512
1543
1544 // This block controls legalization for operations that don't have
1545 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1546 // narrower widths.
1547 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1548 // These operations are handled on non-VLX by artificially widening in
1549 // isel patterns.
1550 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1551
1552 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1553 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1554 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1555 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1556 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1557
1558 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1559 setOperationAction(ISD::SMAX, VT, Legal);
1560 setOperationAction(ISD::UMAX, VT, Legal);
1561 setOperationAction(ISD::SMIN, VT, Legal);
1562 setOperationAction(ISD::UMIN, VT, Legal);
1563 setOperationAction(ISD::ABS, VT, Legal);
1564 }
1565
1566 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1567 setOperationAction(ISD::ROTL, VT, Custom);
1568 setOperationAction(ISD::ROTR, VT, Custom);
1569 }
1570
1571 // Custom legalize 2x32 to get a little better code.
1572 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1573 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1574
1575 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1576 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1577 setOperationAction(ISD::MSCATTER, VT, Custom);
1578
1579 if (Subtarget.hasDQI()) {
1580 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1581 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1582 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1583 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1584 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1585
1586 setOperationAction(ISD::MUL, VT, Legal);
1587 }
1588 }
1589
1590 if (Subtarget.hasCDI()) {
1591 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1592 setOperationAction(ISD::CTLZ, VT, Legal);
1593 }
1594 } // Subtarget.hasCDI()
1595
1596 if (Subtarget.hasVPOPCNTDQ()) {
1597 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1598 setOperationAction(ISD::CTPOP, VT, Legal);
1599 }
1600 }
1601
1602 // This block control legalization of v32i1/v64i1 which are available with
1603 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1604 // useBWIRegs.
1605 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1606 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1607 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1608
1609 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1610 setOperationAction(ISD::ADD, VT, Custom);
1611 setOperationAction(ISD::SUB, VT, Custom);
1612 setOperationAction(ISD::MUL, VT, Custom);
1613 setOperationAction(ISD::VSELECT, VT, Expand);
1614 setOperationAction(ISD::UADDSAT, VT, Custom);
1615 setOperationAction(ISD::SADDSAT, VT, Custom);
1616 setOperationAction(ISD::USUBSAT, VT, Custom);
1617 setOperationAction(ISD::SSUBSAT, VT, Custom);
1618
1619 setOperationAction(ISD::TRUNCATE, VT, Custom);
1620 setOperationAction(ISD::SETCC, VT, Custom);
1621 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1622 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1623 setOperationAction(ISD::SELECT, VT, Custom);
1624 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1625 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1626 }
1627
1628 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1629 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1630 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1631 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1632 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1633 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1634
1635 // Extends from v32i1 masks to 256-bit vectors.
1636 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1637 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1638 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1639 }
1640
1641 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1642 // disabled based on prefer-vector-width and required-vector-width function
1643 // attributes.
1644 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1645 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1646 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1647
1648 // Extends from v64i1 masks to 512-bit vectors.
1649 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1650 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1651 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1652
1653 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1654 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1655 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1656 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1657 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1658 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1659 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1660 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1661 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1662 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1663 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1664 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1665 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1666 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1667 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1668 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1669 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1670 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1671 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1672 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1673 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1674 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1675 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1676
1677 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1678 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1679
1680 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1681
1682 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1683 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1684 setOperationAction(ISD::VSELECT, VT, Custom);
1685 setOperationAction(ISD::ABS, VT, Legal);
1686 setOperationAction(ISD::SRL, VT, Custom);
1687 setOperationAction(ISD::SHL, VT, Custom);
1688 setOperationAction(ISD::SRA, VT, Custom);
1689 setOperationAction(ISD::MLOAD, VT, Legal);
1690 setOperationAction(ISD::MSTORE, VT, Legal);
1691 setOperationAction(ISD::CTPOP, VT, Custom);
1692 setOperationAction(ISD::CTLZ, VT, Custom);
1693 setOperationAction(ISD::SMAX, VT, Legal);
1694 setOperationAction(ISD::UMAX, VT, Legal);
1695 setOperationAction(ISD::SMIN, VT, Legal);
1696 setOperationAction(ISD::UMIN, VT, Legal);
1697 setOperationAction(ISD::SETCC, VT, Custom);
1698 setOperationAction(ISD::UADDSAT, VT, Legal);
1699 setOperationAction(ISD::SADDSAT, VT, Legal);
1700 setOperationAction(ISD::USUBSAT, VT, Legal);
1701 setOperationAction(ISD::SSUBSAT, VT, Legal);
1702
1703 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1704 // setcc all the way to isel and prefer SETGT in some isel patterns.
1705 setCondCodeAction(ISD::SETLT, VT, Custom);
1706 setCondCodeAction(ISD::SETLE, VT, Custom);
1707 }
1708
1709 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1710 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1711 }
1712
1713 if (Subtarget.hasBITALG()) {
1714 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1715 setOperationAction(ISD::CTPOP, VT, Legal);
1716 }
1717
1718 if (Subtarget.hasVBMI2()) {
1719 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1720 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1721 }
1722 }
1723
1724 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1725 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1726 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1727 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1728 }
1729
1730 // These operations are handled on non-VLX by artificially widening in
1731 // isel patterns.
1732 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1733
1734 if (Subtarget.hasBITALG()) {
1735 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1736 setOperationAction(ISD::CTPOP, VT, Legal);
1737 }
1738 }
1739
1740 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1741 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1742 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1743 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1744 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1745 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1746
1747 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1748 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1749 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1750 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1751 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1752
1753 if (Subtarget.hasDQI()) {
1754 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1755 // v2f32 UINT_TO_FP is already custom under SSE2.
1756 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1757 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 1758, __PRETTY_FUNCTION__))
1758 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 1758, __PRETTY_FUNCTION__))
;
1759 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1760 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1761 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1762 }
1763
1764 if (Subtarget.hasBWI()) {
1765 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1766 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1767 }
1768
1769 if (Subtarget.hasVBMI2()) {
1770 // TODO: Make these legal even without VLX?
1771 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1772 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1773 setOperationAction(ISD::FSHL, VT, Custom);
1774 setOperationAction(ISD::FSHR, VT, Custom);
1775 }
1776 }
1777 }
1778
1779 // We want to custom lower some of our intrinsics.
1780 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1781 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1782 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1783 if (!Subtarget.is64Bit()) {
1784 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1785 }
1786
1787 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1788 // handle type legalization for these operations here.
1789 //
1790 // FIXME: We really should do custom legalization for addition and
1791 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1792 // than generic legalization for 64-bit multiplication-with-overflow, though.
1793 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1794 if (VT == MVT::i64 && !Subtarget.is64Bit())
1795 continue;
1796 // Add/Sub/Mul with overflow operations are custom lowered.
1797 setOperationAction(ISD::SADDO, VT, Custom);
1798 setOperationAction(ISD::UADDO, VT, Custom);
1799 setOperationAction(ISD::SSUBO, VT, Custom);
1800 setOperationAction(ISD::USUBO, VT, Custom);
1801 setOperationAction(ISD::SMULO, VT, Custom);
1802 setOperationAction(ISD::UMULO, VT, Custom);
1803
1804 // Support carry in as value rather than glue.
1805 setOperationAction(ISD::ADDCARRY, VT, Custom);
1806 setOperationAction(ISD::SUBCARRY, VT, Custom);
1807 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1808 }
1809
1810 if (!Subtarget.is64Bit()) {
1811 // These libcalls are not available in 32-bit.
1812 setLibcallName(RTLIB::SHL_I128, nullptr);
1813 setLibcallName(RTLIB::SRL_I128, nullptr);
1814 setLibcallName(RTLIB::SRA_I128, nullptr);
1815 setLibcallName(RTLIB::MUL_I128, nullptr);
1816 }
1817
1818 // Combine sin / cos into _sincos_stret if it is available.
1819 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1820 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1821 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1822 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1823 }
1824
1825 if (Subtarget.isTargetWin64()) {
1826 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1827 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1828 setOperationAction(ISD::SREM, MVT::i128, Custom);
1829 setOperationAction(ISD::UREM, MVT::i128, Custom);
1830 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1831 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1832 }
1833
1834 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1835 // is. We should promote the value to 64-bits to solve this.
1836 // This is what the CRT headers do - `fmodf` is an inline header
1837 // function casting to f64 and calling `fmod`.
1838 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1839 Subtarget.isTargetWindowsItanium()))
1840 for (ISD::NodeType Op :
1841 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1842 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1843 if (isOperationExpand(Op, MVT::f32))
1844 setOperationAction(Op, MVT::f32, Promote);
1845
1846 // We have target-specific dag combine patterns for the following nodes:
1847 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1848 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1849 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1850 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1851 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1852 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1853 setTargetDAGCombine(ISD::BITCAST);
1854 setTargetDAGCombine(ISD::VSELECT);
1855 setTargetDAGCombine(ISD::SELECT);
1856 setTargetDAGCombine(ISD::SHL);
1857 setTargetDAGCombine(ISD::SRA);
1858 setTargetDAGCombine(ISD::SRL);
1859 setTargetDAGCombine(ISD::OR);
1860 setTargetDAGCombine(ISD::AND);
1861 setTargetDAGCombine(ISD::ADD);
1862 setTargetDAGCombine(ISD::FADD);
1863 setTargetDAGCombine(ISD::FSUB);
1864 setTargetDAGCombine(ISD::FNEG);
1865 setTargetDAGCombine(ISD::FMA);
1866 setTargetDAGCombine(ISD::FMINNUM);
1867 setTargetDAGCombine(ISD::FMAXNUM);
1868 setTargetDAGCombine(ISD::SUB);
1869 setTargetDAGCombine(ISD::LOAD);
1870 setTargetDAGCombine(ISD::MLOAD);
1871 setTargetDAGCombine(ISD::STORE);
1872 setTargetDAGCombine(ISD::MSTORE);
1873 setTargetDAGCombine(ISD::TRUNCATE);
1874 setTargetDAGCombine(ISD::ZERO_EXTEND);
1875 setTargetDAGCombine(ISD::ANY_EXTEND);
1876 setTargetDAGCombine(ISD::SIGN_EXTEND);
1877 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1878 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1879 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1880 setTargetDAGCombine(ISD::SINT_TO_FP);
1881 setTargetDAGCombine(ISD::UINT_TO_FP);
1882 setTargetDAGCombine(ISD::SETCC);
1883 setTargetDAGCombine(ISD::MUL);
1884 setTargetDAGCombine(ISD::XOR);
1885 setTargetDAGCombine(ISD::MSCATTER);
1886 setTargetDAGCombine(ISD::MGATHER);
1887
1888 computeRegisterProperties(Subtarget.getRegisterInfo());
1889
1890 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1891 MaxStoresPerMemsetOptSize = 8;
1892 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1893 MaxStoresPerMemcpyOptSize = 4;
1894 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1895 MaxStoresPerMemmoveOptSize = 4;
1896
1897 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1898 // that needs to benchmarked and balanced with the potential use of vector
1899 // load/store types (PR33329, PR33914).
1900 MaxLoadsPerMemcmp = 2;
1901 MaxLoadsPerMemcmpOptSize = 2;
1902
1903 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1904 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1905
1906 // An out-of-order CPU can speculatively execute past a predictable branch,
1907 // but a conditional move could be stalled by an expensive earlier operation.
1908 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1909 EnableExtLdPromotion = true;
1910 setPrefFunctionAlignment(4); // 2^4 bytes.
1911
1912 verifyIntrinsicTables();
1913}
1914
1915// This has so far only been implemented for 64-bit MachO.
1916bool X86TargetLowering::useLoadStackGuardNode() const {
1917 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1918}
1919
1920bool X86TargetLowering::useStackGuardXorFP() const {
1921 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1922 return Subtarget.getTargetTriple().isOSMSVCRT();
1923}
1924
1925SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1926 const SDLoc &DL) const {
1927 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1928 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1929 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1930 return SDValue(Node, 0);
1931}
1932
1933TargetLoweringBase::LegalizeTypeAction
1934X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1935 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1936 return TypeSplitVector;
1937
1938 if (ExperimentalVectorWideningLegalization &&
1939 VT.getVectorNumElements() != 1 &&
1940 VT.getVectorElementType() != MVT::i1)
1941 return TypeWidenVector;
1942
1943 return TargetLoweringBase::getPreferredVectorAction(VT);
1944}
1945
1946MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1947 CallingConv::ID CC,
1948 EVT VT) const {
1949 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1950 return MVT::v32i8;
1951 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1952}
1953
1954unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1955 CallingConv::ID CC,
1956 EVT VT) const {
1957 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1958 return 1;
1959 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1960}
1961
1962EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1963 LLVMContext& Context,
1964 EVT VT) const {
1965 if (!VT.isVector())
1966 return MVT::i8;
1967
1968 if (Subtarget.hasAVX512()) {
1969 const unsigned NumElts = VT.getVectorNumElements();
1970
1971 // Figure out what this type will be legalized to.
1972 EVT LegalVT = VT;
1973 while (getTypeAction(Context, LegalVT) != TypeLegal)
1974 LegalVT = getTypeToTransformTo(Context, LegalVT);
1975
1976 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1977 if (LegalVT.getSimpleVT().is512BitVector())
1978 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1979
1980 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1981 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1982 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1983 // vXi16/vXi8.
1984 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1985 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1986 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1987 }
1988 }
1989
1990 return VT.changeVectorElementTypeToInteger();
1991}
1992
1993/// Helper for getByValTypeAlignment to determine
1994/// the desired ByVal argument alignment.
1995static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1996 if (MaxAlign == 16)
1997 return;
1998 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1999 if (VTy->getBitWidth() == 128)
2000 MaxAlign = 16;
2001 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2002 unsigned EltAlign = 0;
2003 getMaxByValAlign(ATy->getElementType(), EltAlign);
2004 if (EltAlign > MaxAlign)
2005 MaxAlign = EltAlign;
2006 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2007 for (auto *EltTy : STy->elements()) {
2008 unsigned EltAlign = 0;
2009 getMaxByValAlign(EltTy, EltAlign);
2010 if (EltAlign > MaxAlign)
2011 MaxAlign = EltAlign;
2012 if (MaxAlign == 16)
2013 break;
2014 }
2015 }
2016}
2017
2018/// Return the desired alignment for ByVal aggregate
2019/// function arguments in the caller parameter area. For X86, aggregates
2020/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2021/// are at 4-byte boundaries.
2022unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2023 const DataLayout &DL) const {
2024 if (Subtarget.is64Bit()) {
2025 // Max of 8 and alignment of type.
2026 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2027 if (TyAlign > 8)
2028 return TyAlign;
2029 return 8;
2030 }
2031
2032 unsigned Align = 4;
2033 if (Subtarget.hasSSE1())
2034 getMaxByValAlign(Ty, Align);
2035 return Align;
2036}
2037
2038/// Returns the target specific optimal type for load
2039/// and store operations as a result of memset, memcpy, and memmove
2040/// lowering. If DstAlign is zero that means it's safe to destination
2041/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2042/// means there isn't a need to check it against alignment requirement,
2043/// probably because the source does not need to be loaded. If 'IsMemset' is
2044/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2045/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2046/// source is constant so it does not need to be loaded.
2047/// It returns EVT::Other if the type should be determined using generic
2048/// target-independent logic.
2049/// For vector ops we check that the overall size isn't larger than our
2050/// preferred vector width.
2051EVT X86TargetLowering::getOptimalMemOpType(
2052 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
2053 bool ZeroMemset, bool MemcpyStrSrc,
2054 const AttributeList &FuncAttributes) const {
2055 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2056 if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
2057 ((DstAlign == 0 || DstAlign >= 16) &&
2058 (SrcAlign == 0 || SrcAlign >= 16)))) {
2059 // FIXME: Check if unaligned 32-byte accesses are slow.
2060 if (Size >= 32 && Subtarget.hasAVX() &&
2061 (Subtarget.getPreferVectorWidth() >= 256)) {
2062 // Although this isn't a well-supported type for AVX1, we'll let
2063 // legalization and shuffle lowering produce the optimal codegen. If we
2064 // choose an optimal type with a vector element larger than a byte,
2065 // getMemsetStores() may create an intermediate splat (using an integer
2066 // multiply) before we splat as a vector.
2067 return MVT::v32i8;
2068 }
2069 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2070 return MVT::v16i8;
2071 // TODO: Can SSE1 handle a byte vector?
2072 // If we have SSE1 registers we should be able to use them.
2073 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2074 (Subtarget.getPreferVectorWidth() >= 128))
2075 return MVT::v4f32;
2076 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2077 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2078 // Do not use f64 to lower memcpy if source is string constant. It's
2079 // better to use i32 to avoid the loads.
2080 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2081 // The gymnastics of splatting a byte value into an XMM register and then
2082 // only using 8-byte stores (because this is a CPU with slow unaligned
2083 // 16-byte accesses) makes that a loser.
2084 return MVT::f64;
2085 }
2086 }
2087 // This is a compromise. If we reach here, unaligned accesses may be slow on
2088 // this target. However, creating smaller, aligned accesses could be even
2089 // slower and would certainly be a lot more code.
2090 if (Subtarget.is64Bit() && Size >= 8)
2091 return MVT::i64;
2092 return MVT::i32;
2093}
2094
2095bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2096 if (VT == MVT::f32)
2097 return X86ScalarSSEf32;
2098 else if (VT == MVT::f64)
2099 return X86ScalarSSEf64;
2100 return true;
2101}
2102
2103bool
2104X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2105 unsigned,
2106 unsigned,
2107 bool *Fast) const {
2108 if (Fast) {
2109 switch (VT.getSizeInBits()) {
2110 default:
2111 // 8-byte and under are always assumed to be fast.
2112 *Fast = true;
2113 break;
2114 case 128:
2115 *Fast = !Subtarget.isUnalignedMem16Slow();
2116 break;
2117 case 256:
2118 *Fast = !Subtarget.isUnalignedMem32Slow();
2119 break;
2120 // TODO: What about AVX-512 (512-bit) accesses?
2121 }
2122 }
2123 // Misaligned accesses of any size are always allowed.
2124 return true;
2125}
2126
2127/// Return the entry encoding for a jump table in the
2128/// current function. The returned value is a member of the
2129/// MachineJumpTableInfo::JTEntryKind enum.
2130unsigned X86TargetLowering::getJumpTableEncoding() const {
2131 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2132 // symbol.
2133 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2134 return MachineJumpTableInfo::EK_Custom32;
2135
2136 // Otherwise, use the normal jump table encoding heuristics.
2137 return TargetLowering::getJumpTableEncoding();
2138}
2139
2140bool X86TargetLowering::useSoftFloat() const {
2141 return Subtarget.useSoftFloat();
2142}
2143
2144void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2145 ArgListTy &Args) const {
2146
2147 // Only relabel X86-32 for C / Stdcall CCs.
2148 if (Subtarget.is64Bit())
2149 return;
2150 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2151 return;
2152 unsigned ParamRegs = 0;
2153 if (auto *M = MF->getFunction().getParent())
2154 ParamRegs = M->getNumberRegisterParameters();
2155
2156 // Mark the first N int arguments as having reg
2157 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2158 Type *T = Args[Idx].Ty;
2159 if (T->isIntOrPtrTy())
2160 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2161 unsigned numRegs = 1;
2162 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2163 numRegs = 2;
2164 if (ParamRegs < numRegs)
2165 return;
2166 ParamRegs -= numRegs;
2167 Args[Idx].IsInReg = true;
2168 }
2169 }
2170}
2171
2172const MCExpr *
2173X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2174 const MachineBasicBlock *MBB,
2175 unsigned uid,MCContext &Ctx) const{
2176 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2176, __PRETTY_FUNCTION__))
;
2177 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2178 // entries.
2179 return MCSymbolRefExpr::create(MBB->getSymbol(),
2180 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2181}
2182
2183/// Returns relocation base for the given PIC jumptable.
2184SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2185 SelectionDAG &DAG) const {
2186 if (!Subtarget.is64Bit())
2187 // This doesn't have SDLoc associated with it, but is not really the
2188 // same as a Register.
2189 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2190 getPointerTy(DAG.getDataLayout()));
2191 return Table;
2192}
2193
2194/// This returns the relocation base for the given PIC jumptable,
2195/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2196const MCExpr *X86TargetLowering::
2197getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2198 MCContext &Ctx) const {
2199 // X86-64 uses RIP relative addressing based on the jump table label.
2200 if (Subtarget.isPICStyleRIPRel())
2201 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2202
2203 // Otherwise, the reference is relative to the PIC base.
2204 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2205}
2206
2207std::pair<const TargetRegisterClass *, uint8_t>
2208X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2209 MVT VT) const {
2210 const TargetRegisterClass *RRC = nullptr;
2211 uint8_t Cost = 1;
2212 switch (VT.SimpleTy) {
2213 default:
2214 return TargetLowering::findRepresentativeClass(TRI, VT);
2215 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2216 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2217 break;
2218 case MVT::x86mmx:
2219 RRC = &X86::VR64RegClass;
2220 break;
2221 case MVT::f32: case MVT::f64:
2222 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2223 case MVT::v4f32: case MVT::v2f64:
2224 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2225 case MVT::v8f32: case MVT::v4f64:
2226 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2227 case MVT::v16f32: case MVT::v8f64:
2228 RRC = &X86::VR128XRegClass;
2229 break;
2230 }
2231 return std::make_pair(RRC, Cost);
2232}
2233
2234unsigned X86TargetLowering::getAddressSpace() const {
2235 if (Subtarget.is64Bit())
2236 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2237 return 256;
2238}
2239
2240static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2241 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2242 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2243}
2244
2245static Constant* SegmentOffset(IRBuilder<> &IRB,
2246 unsigned Offset, unsigned AddressSpace) {
2247 return ConstantExpr::getIntToPtr(
2248 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2249 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2250}
2251
2252Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2253 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2254 // tcbhead_t; use it instead of the usual global variable (see
2255 // sysdeps/{i386,x86_64}/nptl/tls.h)
2256 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2257 if (Subtarget.isTargetFuchsia()) {
2258 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2259 return SegmentOffset(IRB, 0x10, getAddressSpace());
2260 } else {
2261 // %fs:0x28, unless we're using a Kernel code model, in which case
2262 // it's %gs:0x28. gs:0x14 on i386.
2263 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2264 return SegmentOffset(IRB, Offset, getAddressSpace());
2265 }
2266 }
2267
2268 return TargetLowering::getIRStackGuard(IRB);
2269}
2270
2271void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2272 // MSVC CRT provides functionalities for stack protection.
2273 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2274 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2275 // MSVC CRT has a global variable holding security cookie.
2276 M.getOrInsertGlobal("__security_cookie",
2277 Type::getInt8PtrTy(M.getContext()));
2278
2279 // MSVC CRT has a function to validate security cookie.
2280 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2281 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2282 Type::getInt8PtrTy(M.getContext()));
2283 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2284 F->setCallingConv(CallingConv::X86_FastCall);
2285 F->addAttribute(1, Attribute::AttrKind::InReg);
2286 }
2287 return;
2288 }
2289 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2290 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2291 return;
2292 TargetLowering::insertSSPDeclarations(M);
2293}
2294
2295Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2296 // MSVC CRT has a global variable holding security cookie.
2297 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2298 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2299 return M.getGlobalVariable("__security_cookie");
2300 }
2301 return TargetLowering::getSDagStackGuard(M);
2302}
2303
2304Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2305 // MSVC CRT has a function to validate security cookie.
2306 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2307 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2308 return M.getFunction("__security_check_cookie");
2309 }
2310 return TargetLowering::getSSPStackGuardCheck(M);
2311}
2312
2313Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2314 if (Subtarget.getTargetTriple().isOSContiki())
2315 return getDefaultSafeStackPointerLocation(IRB, false);
2316
2317 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2318 // definition of TLS_SLOT_SAFESTACK in
2319 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2320 if (Subtarget.isTargetAndroid()) {
2321 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2322 // %gs:0x24 on i386
2323 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2324 return SegmentOffset(IRB, Offset, getAddressSpace());
2325 }
2326
2327 // Fuchsia is similar.
2328 if (Subtarget.isTargetFuchsia()) {
2329 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2330 return SegmentOffset(IRB, 0x18, getAddressSpace());
2331 }
2332
2333 return TargetLowering::getSafeStackPointerLocation(IRB);
2334}
2335
2336bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2337 unsigned DestAS) const {
2338 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2338, __PRETTY_FUNCTION__))
;
2339
2340 return SrcAS < 256 && DestAS < 256;
2341}
2342
2343//===----------------------------------------------------------------------===//
2344// Return Value Calling Convention Implementation
2345//===----------------------------------------------------------------------===//
2346
2347bool X86TargetLowering::CanLowerReturn(
2348 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2349 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2350 SmallVector<CCValAssign, 16> RVLocs;
2351 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2352 return CCInfo.CheckReturn(Outs, RetCC_X86);
2353}
2354
2355const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2356 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2357 return ScratchRegs;
2358}
2359
2360/// Lowers masks values (v*i1) to the local register values
2361/// \returns DAG node after lowering to register type
2362static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2363 const SDLoc &Dl, SelectionDAG &DAG) {
2364 EVT ValVT = ValArg.getValueType();
2365
2366 if (ValVT == MVT::v1i1)
2367 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2368 DAG.getIntPtrConstant(0, Dl));
2369
2370 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2371 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2372 // Two stage lowering might be required
2373 // bitcast: v8i1 -> i8 / v16i1 -> i16
2374 // anyextend: i8 -> i32 / i16 -> i32
2375 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2376 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2377 if (ValLoc == MVT::i32)
2378 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2379 return ValToCopy;
2380 }
2381
2382 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2383 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2384 // One stage lowering is required
2385 // bitcast: v32i1 -> i32 / v64i1 -> i64
2386 return DAG.getBitcast(ValLoc, ValArg);
2387 }
2388
2389 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2390}
2391
2392/// Breaks v64i1 value into two registers and adds the new node to the DAG
2393static void Passv64i1ArgInRegs(
2394 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2395 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2396 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2397 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2397, __PRETTY_FUNCTION__))
;
2398 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2398, __PRETTY_FUNCTION__))
;
2399 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2399, __PRETTY_FUNCTION__))
;
2400 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2401, __PRETTY_FUNCTION__))
2401 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2401, __PRETTY_FUNCTION__))
;
2402
2403 // Before splitting the value we cast it to i64
2404 Arg = DAG.getBitcast(MVT::i64, Arg);
2405
2406 // Splitting the value into two i32 types
2407 SDValue Lo, Hi;
2408 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2409 DAG.getConstant(0, Dl, MVT::i32));
2410 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2411 DAG.getConstant(1, Dl, MVT::i32));
2412
2413 // Attach the two i32 types into corresponding registers
2414 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2415 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2416}
2417
2418SDValue
2419X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2420 bool isVarArg,
2421 const SmallVectorImpl<ISD::OutputArg> &Outs,
2422 const SmallVectorImpl<SDValue> &OutVals,
2423 const SDLoc &dl, SelectionDAG &DAG) const {
2424 MachineFunction &MF = DAG.getMachineFunction();
2425 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2426
2427 // In some cases we need to disable registers from the default CSR list.
2428 // For example, when they are used for argument passing.
2429 bool ShouldDisableCalleeSavedRegister =
2430 CallConv == CallingConv::X86_RegCall ||
2431 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2432
2433 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2434 report_fatal_error("X86 interrupts may not return any value");
2435
2436 SmallVector<CCValAssign, 16> RVLocs;
2437 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2438 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2439
2440 SDValue Flag;
2441 SmallVector<SDValue, 6> RetOps;
2442 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2443 // Operand #1 = Bytes To Pop
2444 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2445 MVT::i32));
2446
2447 // Copy the result values into the output registers.
2448 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2449 ++I, ++OutsIndex) {
2450 CCValAssign &VA = RVLocs[I];
2451 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2451, __PRETTY_FUNCTION__))
;
2452
2453 // Add the register to the CalleeSaveDisableRegs list.
2454 if (ShouldDisableCalleeSavedRegister)
2455 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2456
2457 SDValue ValToCopy = OutVals[OutsIndex];
2458 EVT ValVT = ValToCopy.getValueType();
2459
2460 // Promote values to the appropriate types.
2461 if (VA.getLocInfo() == CCValAssign::SExt)
2462 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2463 else if (VA.getLocInfo() == CCValAssign::ZExt)
2464 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2465 else if (VA.getLocInfo() == CCValAssign::AExt) {
2466 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2467 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2468 else
2469 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2470 }
2471 else if (VA.getLocInfo() == CCValAssign::BCvt)
2472 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2473
2474 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2475, __PRETTY_FUNCTION__))
2475 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2475, __PRETTY_FUNCTION__))
;
2476
2477 // If this is x86-64, and we disabled SSE, we can't return FP values,
2478 // or SSE or MMX vectors.
2479 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2480 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2481 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2482 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2483 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2484 } else if (ValVT == MVT::f64 &&
2485 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2486 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2487 // llvm-gcc has never done it right and no one has noticed, so this
2488 // should be OK for now.
2489 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2490 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2491 }
2492
2493 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2494 // the RET instruction and handled by the FP Stackifier.
2495 if (VA.getLocReg() == X86::FP0 ||
2496 VA.getLocReg() == X86::FP1) {
2497 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2498 // change the value to the FP stack register class.
2499 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2500 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2501 RetOps.push_back(ValToCopy);
2502 // Don't emit a copytoreg.
2503 continue;
2504 }
2505
2506 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2507 // which is returned in RAX / RDX.
2508 if (Subtarget.is64Bit()) {
2509 if (ValVT == MVT::x86mmx) {
2510 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2511 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2512 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2513 ValToCopy);
2514 // If we don't have SSE2 available, convert to v4f32 so the generated
2515 // register is legal.
2516 if (!Subtarget.hasSSE2())
2517 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2518 }
2519 }
2520 }
2521
2522 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2523
2524 if (VA.needsCustom()) {
2525 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2526, __PRETTY_FUNCTION__))
2526 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2526, __PRETTY_FUNCTION__))
;
2527
2528 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2529 Subtarget);
2530
2531 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
2532 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
;
2533
2534 // Add the second register to the CalleeSaveDisableRegs list.
2535 if (ShouldDisableCalleeSavedRegister)
2536 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2537 } else {
2538 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2539 }
2540
2541 // Add nodes to the DAG and add the values into the RetOps list
2542 for (auto &Reg : RegsToPass) {
2543 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2544 Flag = Chain.getValue(1);
2545 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2546 }
2547 }
2548
2549 // Swift calling convention does not require we copy the sret argument
2550 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2551
2552 // All x86 ABIs require that for returning structs by value we copy
2553 // the sret argument into %rax/%eax (depending on ABI) for the return.
2554 // We saved the argument into a virtual register in the entry block,
2555 // so now we copy the value out and into %rax/%eax.
2556 //
2557 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2558 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2559 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2560 // either case FuncInfo->setSRetReturnReg() will have been called.
2561 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2562 // When we have both sret and another return value, we should use the
2563 // original Chain stored in RetOps[0], instead of the current Chain updated
2564 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2565
2566 // For the case of sret and another return value, we have
2567 // Chain_0 at the function entry
2568 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2569 // If we use Chain_1 in getCopyFromReg, we will have
2570 // Val = getCopyFromReg(Chain_1)
2571 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2572
2573 // getCopyToReg(Chain_0) will be glued together with
2574 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2575 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2576 // Data dependency from Unit B to Unit A due to usage of Val in
2577 // getCopyToReg(Chain_1, Val)
2578 // Chain dependency from Unit A to Unit B
2579
2580 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2581 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2582 getPointerTy(MF.getDataLayout()));
2583
2584 unsigned RetValReg
2585 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2586 X86::RAX : X86::EAX;
2587 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2588 Flag = Chain.getValue(1);
2589
2590 // RAX/EAX now acts like a return value.
2591 RetOps.push_back(
2592 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2593
2594 // Add the returned register to the CalleeSaveDisableRegs list.
2595 if (ShouldDisableCalleeSavedRegister)
2596 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2597 }
2598
2599 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2600 const MCPhysReg *I =
2601 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2602 if (I) {
2603 for (; *I; ++I) {
2604 if (X86::GR64RegClass.contains(*I))
2605 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2606 else
2607 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2607)
;
2608 }
2609 }
2610
2611 RetOps[0] = Chain; // Update chain.
2612
2613 // Add the flag if we have it.
2614 if (Flag.getNode())
2615 RetOps.push_back(Flag);
2616
2617 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2618 if (CallConv == CallingConv::X86_INTR)
2619 opcode = X86ISD::IRET;
2620 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2621}
2622
2623bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2624 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2625 return false;
2626
2627 SDValue TCChain = Chain;
2628 SDNode *Copy = *N->use_begin();
2629 if (Copy->getOpcode() == ISD::CopyToReg) {
2630 // If the copy has a glue operand, we conservatively assume it isn't safe to
2631 // perform a tail call.
2632 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2633 return false;
2634 TCChain = Copy->getOperand(0);
2635 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2636 return false;
2637
2638 bool HasRet = false;
2639 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2640 UI != UE; ++UI) {
2641 if (UI->getOpcode() != X86ISD::RET_FLAG)
2642 return false;
2643 // If we are returning more than one value, we can definitely
2644 // not make a tail call see PR19530
2645 if (UI->getNumOperands() > 4)
2646 return false;
2647 if (UI->getNumOperands() == 4 &&
2648 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2649 return false;
2650 HasRet = true;
2651 }
2652
2653 if (!HasRet)
2654 return false;
2655
2656 Chain = TCChain;
2657 return true;
2658}
2659
2660EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2661 ISD::NodeType ExtendKind) const {
2662 MVT ReturnMVT = MVT::i32;
2663
2664 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2665 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2666 // The ABI does not require i1, i8 or i16 to be extended.
2667 //
2668 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2669 // always extending i8/i16 return values, so keep doing that for now.
2670 // (PR26665).
2671 ReturnMVT = MVT::i8;
2672 }
2673
2674 EVT MinVT = getRegisterType(Context, ReturnMVT);
2675 return VT.bitsLT(MinVT) ? MinVT : VT;
2676}
2677
2678/// Reads two 32 bit registers and creates a 64 bit mask value.
2679/// \param VA The current 32 bit value that need to be assigned.
2680/// \param NextVA The next 32 bit value that need to be assigned.
2681/// \param Root The parent DAG node.
2682/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2683/// glue purposes. In the case the DAG is already using
2684/// physical register instead of virtual, we should glue
2685/// our new SDValue to InFlag SDvalue.
2686/// \return a new SDvalue of size 64bit.
2687static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2688 SDValue &Root, SelectionDAG &DAG,
2689 const SDLoc &Dl, const X86Subtarget &Subtarget,
2690 SDValue *InFlag = nullptr) {
2691 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2691, __PRETTY_FUNCTION__))
;
2692 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2692, __PRETTY_FUNCTION__))
;
2693 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2694, __PRETTY_FUNCTION__))
2694 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2694, __PRETTY_FUNCTION__))
;
2695 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2696, __PRETTY_FUNCTION__))
2696 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2696, __PRETTY_FUNCTION__))
;
2697 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2698, __PRETTY_FUNCTION__))
2698 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2698, __PRETTY_FUNCTION__))
;
2699
2700 SDValue Lo, Hi;
2701 SDValue ArgValueLo, ArgValueHi;
2702
2703 MachineFunction &MF = DAG.getMachineFunction();
2704 const TargetRegisterClass *RC = &X86::GR32RegClass;
2705
2706 // Read a 32 bit value from the registers.
2707 if (nullptr == InFlag) {
2708 // When no physical register is present,
2709 // create an intermediate virtual register.
2710 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2711 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2712 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2713 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2714 } else {
2715 // When a physical register is available read the value from it and glue
2716 // the reads together.
2717 ArgValueLo =
2718 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2719 *InFlag = ArgValueLo.getValue(2);
2720 ArgValueHi =
2721 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2722 *InFlag = ArgValueHi.getValue(2);
2723 }
2724
2725 // Convert the i32 type into v32i1 type.
2726 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2727
2728 // Convert the i32 type into v32i1 type.
2729 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2730
2731 // Concatenate the two values together.
2732 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2733}
2734
2735/// The function will lower a register of various sizes (8/16/32/64)
2736/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2737/// \returns a DAG node contains the operand after lowering to mask type.
2738static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2739 const EVT &ValLoc, const SDLoc &Dl,
2740 SelectionDAG &DAG) {
2741 SDValue ValReturned = ValArg;
2742
2743 if (ValVT == MVT::v1i1)
2744 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2745
2746 if (ValVT == MVT::v64i1) {
2747 // In 32 bit machine, this case is handled by getv64i1Argument
2748 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2748, __PRETTY_FUNCTION__))
;
2749 // In 64 bit machine, There is no need to truncate the value only bitcast
2750 } else {
2751 MVT maskLen;
2752 switch (ValVT.getSimpleVT().SimpleTy) {
2753 case MVT::v8i1:
2754 maskLen = MVT::i8;
2755 break;
2756 case MVT::v16i1:
2757 maskLen = MVT::i16;
2758 break;
2759 case MVT::v32i1:
2760 maskLen = MVT::i32;
2761 break;
2762 default:
2763 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2763)
;
2764 }
2765
2766 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2767 }
2768 return DAG.getBitcast(ValVT, ValReturned);
2769}
2770
2771/// Lower the result values of a call into the
2772/// appropriate copies out of appropriate physical registers.
2773///
2774SDValue X86TargetLowering::LowerCallResult(
2775 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2776 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2777 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2778 uint32_t *RegMask) const {
2779
2780 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2781 // Assign locations to each value returned by this call.
2782 SmallVector<CCValAssign, 16> RVLocs;
2783 bool Is64Bit = Subtarget.is64Bit();
2784 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2785 *DAG.getContext());
2786 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2787
2788 // Copy all of the result registers out of their specified physreg.
2789 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2790 ++I, ++InsIndex) {
2791 CCValAssign &VA = RVLocs[I];
2792 EVT CopyVT = VA.getLocVT();
2793
2794 // In some calling conventions we need to remove the used registers
2795 // from the register mask.
2796 if (RegMask) {
2797 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2798 SubRegs.isValid(); ++SubRegs)
2799 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2800 }
2801
2802 // If this is x86-64, and we disabled SSE, we can't return FP values
2803 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2804 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2805 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2806 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2807 }
2808
2809 // If we prefer to use the value in xmm registers, copy it out as f80 and
2810 // use a truncate to move it from fp stack reg to xmm reg.
2811 bool RoundAfterCopy = false;
2812 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2813 isScalarFPTypeInSSEReg(VA.getValVT())) {
2814 if (!Subtarget.hasX87())
2815 report_fatal_error("X87 register return with X87 disabled");
2816 CopyVT = MVT::f80;
2817 RoundAfterCopy = (CopyVT != VA.getLocVT());
2818 }
2819
2820 SDValue Val;
2821 if (VA.needsCustom()) {
2822 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2823, __PRETTY_FUNCTION__))
2823 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 2823, __PRETTY_FUNCTION__))
;
2824 Val =
2825 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2826 } else {
2827 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2828 .getValue(1);
2829 Val = Chain.getValue(0);
2830 InFlag = Chain.getValue(2);
2831 }
2832
2833 if (RoundAfterCopy)
2834 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2835 // This truncation won't change the value.
2836 DAG.getIntPtrConstant(1, dl));
2837
2838 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2839 if (VA.getValVT().isVector() &&
2840 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2841 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2842 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2843 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2844 } else
2845 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2846 }
2847
2848 InVals.push_back(Val);
2849 }
2850
2851 return Chain;
2852}
2853
2854//===----------------------------------------------------------------------===//
2855// C & StdCall & Fast Calling Convention implementation
2856//===----------------------------------------------------------------------===//
2857// StdCall calling convention seems to be standard for many Windows' API
2858// routines and around. It differs from C calling convention just a little:
2859// callee should clean up the stack, not caller. Symbols should be also
2860// decorated in some fancy way :) It doesn't support any vector arguments.
2861// For info on fast calling convention see Fast Calling Convention (tail call)
2862// implementation LowerX86_32FastCCCallTo.
2863
2864/// CallIsStructReturn - Determines whether a call uses struct return
2865/// semantics.
2866enum StructReturnType {
2867 NotStructReturn,
2868 RegStructReturn,
2869 StackStructReturn
2870};
2871static StructReturnType
2872callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2873 if (Outs.empty())
2874 return NotStructReturn;
2875
2876 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2877 if (!Flags.isSRet())
2878 return NotStructReturn;
2879 if (Flags.isInReg() || IsMCU)
2880 return RegStructReturn;
2881 return StackStructReturn;
2882}
2883
2884/// Determines whether a function uses struct return semantics.
2885static StructReturnType
2886argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2887 if (Ins.empty())
2888 return NotStructReturn;
2889
2890 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2891 if (!Flags.isSRet())
2892 return NotStructReturn;
2893 if (Flags.isInReg() || IsMCU)
2894 return RegStructReturn;
2895 return StackStructReturn;
2896}
2897
2898/// Make a copy of an aggregate at address specified by "Src" to address
2899/// "Dst" with size and alignment information specified by the specific
2900/// parameter attribute. The copy will be passed as a byval function parameter.
2901static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2902 SDValue Chain, ISD::ArgFlagsTy Flags,
2903 SelectionDAG &DAG, const SDLoc &dl) {
2904 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2905
2906 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2907 /*isVolatile*/false, /*AlwaysInline=*/true,
2908 /*isTailCall*/false,
2909 MachinePointerInfo(), MachinePointerInfo());
2910}
2911
2912/// Return true if the calling convention is one that we can guarantee TCO for.
2913static bool canGuaranteeTCO(CallingConv::ID CC) {
2914 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2915 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2916 CC == CallingConv::HHVM);
2917}
2918
2919/// Return true if we might ever do TCO for calls with this calling convention.
2920static bool mayTailCallThisCC(CallingConv::ID CC) {
2921 switch (CC) {
2922 // C calling conventions:
2923 case CallingConv::C:
2924 case CallingConv::Win64:
2925 case CallingConv::X86_64_SysV:
2926 // Callee pop conventions:
2927 case CallingConv::X86_ThisCall:
2928 case CallingConv::X86_StdCall:
2929 case CallingConv::X86_VectorCall:
2930 case CallingConv::X86_FastCall:
2931 // Swift:
2932 case CallingConv::Swift:
2933 return true;
2934 default:
2935 return canGuaranteeTCO(CC);
2936 }
2937}
2938
2939/// Return true if the function is being made into a tailcall target by
2940/// changing its ABI.
2941static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2942 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2943}
2944
2945bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2946 auto Attr =
2947 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2948 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2949 return false;
2950
2951 ImmutableCallSite CS(CI);
2952 CallingConv::ID CalleeCC = CS.getCallingConv();
2953 if (!mayTailCallThisCC(CalleeCC))
2954 return false;
2955
2956 return true;
2957}
2958
2959SDValue
2960X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2961 const SmallVectorImpl<ISD::InputArg> &Ins,
2962 const SDLoc &dl, SelectionDAG &DAG,
2963 const CCValAssign &VA,
2964 MachineFrameInfo &MFI, unsigned i) const {
2965 // Create the nodes corresponding to a load from this parameter slot.
2966 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2967 bool AlwaysUseMutable = shouldGuaranteeTCO(
2968 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2969 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2970 EVT ValVT;
2971 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2972
2973 // If value is passed by pointer we have address passed instead of the value
2974 // itself. No need to extend if the mask value and location share the same
2975 // absolute size.
2976 bool ExtendedInMem =
2977 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2978 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2979
2980 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2981 ValVT = VA.getLocVT();
2982 else
2983 ValVT = VA.getValVT();
2984
2985 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2986 // changed with more analysis.
2987 // In case of tail call optimization mark all arguments mutable. Since they
2988 // could be overwritten by lowering of arguments in case of a tail call.
2989 if (Flags.isByVal()) {
2990 unsigned Bytes = Flags.getByValSize();
2991 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2992
2993 // FIXME: For now, all byval parameter objects are marked as aliasing. This
2994 // can be improved with deeper analysis.
2995 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
2996 /*isAliased=*/true);
2997 return DAG.getFrameIndex(FI, PtrVT);
2998 }
2999
3000 // This is an argument in memory. We might be able to perform copy elision.
3001 // If the argument is passed directly in memory without any extension, then we
3002 // can perform copy elision. Large vector types, for example, may be passed
3003 // indirectly by pointer.
3004 if (Flags.isCopyElisionCandidate() &&
3005 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3006 EVT ArgVT = Ins[i].ArgVT;
3007 SDValue PartAddr;
3008 if (Ins[i].PartOffset == 0) {
3009 // If this is a one-part value or the first part of a multi-part value,
3010 // create a stack object for the entire argument value type and return a
3011 // load from our portion of it. This assumes that if the first part of an
3012 // argument is in memory, the rest will also be in memory.
3013 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3014 /*Immutable=*/false);
3015 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3016 return DAG.getLoad(
3017 ValVT, dl, Chain, PartAddr,
3018 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3019 } else {
3020 // This is not the first piece of an argument in memory. See if there is
3021 // already a fixed stack object including this offset. If so, assume it
3022 // was created by the PartOffset == 0 branch above and create a load from
3023 // the appropriate offset into it.
3024 int64_t PartBegin = VA.getLocMemOffset();
3025 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3026 int FI = MFI.getObjectIndexBegin();
3027 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3028 int64_t ObjBegin = MFI.getObjectOffset(FI);
3029 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3030 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3031 break;
3032 }
3033 if (MFI.isFixedObjectIndex(FI)) {
3034 SDValue Addr =
3035 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3036 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3037 return DAG.getLoad(
3038 ValVT, dl, Chain, Addr,
3039 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3040 Ins[i].PartOffset));
3041 }
3042 }
3043 }
3044
3045 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3046 VA.getLocMemOffset(), isImmutable);
3047
3048 // Set SExt or ZExt flag.
3049 if (VA.getLocInfo() == CCValAssign::ZExt) {
3050 MFI.setObjectZExt(FI, true);
3051 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3052 MFI.setObjectSExt(FI, true);
3053 }
3054
3055 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3056 SDValue Val = DAG.getLoad(
3057 ValVT, dl, Chain, FIN,
3058 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3059 return ExtendedInMem
3060 ? (VA.getValVT().isVector()
3061 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3062 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3063 : Val;
3064}
3065
3066// FIXME: Get this from tablegen.
3067static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3068 const X86Subtarget &Subtarget) {
3069 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3069, __PRETTY_FUNCTION__))
;
3070
3071 if (Subtarget.isCallingConvWin64(CallConv)) {
3072 static const MCPhysReg GPR64ArgRegsWin64[] = {
3073 X86::RCX, X86::RDX, X86::R8, X86::R9
3074 };
3075 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3076 }
3077
3078 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3079 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3080 };
3081 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3082}
3083
3084// FIXME: Get this from tablegen.
3085static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3086 CallingConv::ID CallConv,
3087 const X86Subtarget &Subtarget) {
3088 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3088, __PRETTY_FUNCTION__))
;
3089 if (Subtarget.isCallingConvWin64(CallConv)) {
3090 // The XMM registers which might contain var arg parameters are shadowed
3091 // in their paired GPR. So we only need to save the GPR to their home
3092 // slots.
3093 // TODO: __vectorcall will change this.
3094 return None;
3095 }
3096
3097 const Function &F = MF.getFunction();
3098 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3099 bool isSoftFloat = Subtarget.useSoftFloat();
3100 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3101, __PRETTY_FUNCTION__))
3101 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3101, __PRETTY_FUNCTION__))
;
3102 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3103 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3104 // registers.
3105 return None;
3106
3107 static const MCPhysReg XMMArgRegs64Bit[] = {
3108 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3109 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3110 };
3111 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3112}
3113
3114#ifndef NDEBUG
3115static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3116 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3117 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3118 return A.getValNo() < B.getValNo();
3119 });
3120}
3121#endif
3122
3123SDValue X86TargetLowering::LowerFormalArguments(
3124 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3125 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3126 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3127 MachineFunction &MF = DAG.getMachineFunction();
3128 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3129 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3130
3131 const Function &F = MF.getFunction();
3132 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3133 F.getName() == "main")
3134 FuncInfo->setForceFramePointer(true);
3135
3136 MachineFrameInfo &MFI = MF.getFrameInfo();
3137 bool Is64Bit = Subtarget.is64Bit();
3138 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3139
3140 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3142, __PRETTY_FUNCTION__))
3141 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3142, __PRETTY_FUNCTION__))
3142 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3142, __PRETTY_FUNCTION__))
;
3143
3144 // Assign locations to all of the incoming arguments.
3145 SmallVector<CCValAssign, 16> ArgLocs;
3146 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3147
3148 // Allocate shadow area for Win64.
3149 if (IsWin64)
3150 CCInfo.AllocateStack(32, 8);
3151
3152 CCInfo.AnalyzeArguments(Ins, CC_X86);
3153
3154 // In vectorcall calling convention a second pass is required for the HVA
3155 // types.
3156 if (CallingConv::X86_VectorCall == CallConv) {
3157 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3158 }
3159
3160 // The next loop assumes that the locations are in the same order of the
3161 // input arguments.
3162 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3163, __PRETTY_FUNCTION__))
3163 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3163, __PRETTY_FUNCTION__))
;
3164
3165 SDValue ArgValue;
3166 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3167 ++I, ++InsIndex) {
3168 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3168, __PRETTY_FUNCTION__))
;
3169 CCValAssign &VA = ArgLocs[I];
3170
3171 if (VA.isRegLoc()) {
3172 EVT RegVT = VA.getLocVT();
3173 if (VA.needsCustom()) {
3174 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3176, __PRETTY_FUNCTION__))
3175 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3176, __PRETTY_FUNCTION__))
3176 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3176, __PRETTY_FUNCTION__))
;
3177
3178 // v64i1 values, in regcall calling convention, that are
3179 // compiled to 32 bit arch, are split up into two registers.
3180 ArgValue =
3181 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3182 } else {
3183 const TargetRegisterClass *RC;
3184 if (RegVT == MVT::i8)
3185 RC = &X86::GR8RegClass;
3186 else if (RegVT == MVT::i16)
3187 RC = &X86::GR16RegClass;
3188 else if (RegVT == MVT::i32)
3189 RC = &X86::GR32RegClass;
3190 else if (Is64Bit && RegVT == MVT::i64)
3191 RC = &X86::GR64RegClass;
3192 else if (RegVT == MVT::f32)
3193 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3194 else if (RegVT == MVT::f64)
3195 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3196 else if (RegVT == MVT::f80)
3197 RC = &X86::RFP80RegClass;
3198 else if (RegVT == MVT::f128)
3199 RC = &X86::VR128RegClass;
3200 else if (RegVT.is512BitVector())
3201 RC = &X86::VR512RegClass;
3202 else if (RegVT.is256BitVector())
3203 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3204 else if (RegVT.is128BitVector())
3205 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3206 else if (RegVT == MVT::x86mmx)
3207 RC = &X86::VR64RegClass;
3208 else if (RegVT == MVT::v1i1)
3209 RC = &X86::VK1RegClass;
3210 else if (RegVT == MVT::v8i1)
3211 RC = &X86::VK8RegClass;
3212 else if (RegVT == MVT::v16i1)
3213 RC = &X86::VK16RegClass;
3214 else if (RegVT == MVT::v32i1)
3215 RC = &X86::VK32RegClass;
3216 else if (RegVT == MVT::v64i1)
3217 RC = &X86::VK64RegClass;
3218 else
3219 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3219)
;
3220
3221 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3222 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3223 }
3224
3225 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3226 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3227 // right size.
3228 if (VA.getLocInfo() == CCValAssign::SExt)
3229 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3230 DAG.getValueType(VA.getValVT()));
3231 else if (VA.getLocInfo() == CCValAssign::ZExt)
3232 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3233 DAG.getValueType(VA.getValVT()));
3234 else if (VA.getLocInfo() == CCValAssign::BCvt)
3235 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3236
3237 if (VA.isExtInLoc()) {
3238 // Handle MMX values passed in XMM regs.
3239 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3240 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3241 else if (VA.getValVT().isVector() &&
3242 VA.getValVT().getScalarType() == MVT::i1 &&
3243 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3244 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3245 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3246 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3247 } else
3248 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3249 }
3250 } else {
3251 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3251, __PRETTY_FUNCTION__))
;
3252 ArgValue =
3253 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3254 }
3255
3256 // If value is passed via pointer - do a load.
3257 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3258 ArgValue =
3259 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3260
3261 InVals.push_back(ArgValue);
3262 }
3263
3264 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3265 // Swift calling convention does not require we copy the sret argument
3266 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3267 if (CallConv == CallingConv::Swift)
3268 continue;
3269
3270 // All x86 ABIs require that for returning structs by value we copy the
3271 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3272 // the argument into a virtual register so that we can access it from the
3273 // return points.
3274 if (Ins[I].Flags.isSRet()) {
3275 unsigned Reg = FuncInfo->getSRetReturnReg();
3276 if (!Reg) {
3277 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3278 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3279 FuncInfo->setSRetReturnReg(Reg);
3280 }
3281 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3282 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3283 break;
3284 }
3285 }
3286
3287 unsigned StackSize = CCInfo.getNextStackOffset();
3288 // Align stack specially for tail calls.
3289 if (shouldGuaranteeTCO(CallConv,
3290 MF.getTarget().Options.GuaranteedTailCallOpt))
3291 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3292
3293 // If the function takes variable number of arguments, make a frame index for
3294 // the start of the first vararg value... for expansion of llvm.va_start. We
3295 // can skip this if there are no va_start calls.
3296 if (MFI.hasVAStart() &&
3297 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3298 CallConv != CallingConv::X86_ThisCall))) {
3299 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3300 }
3301
3302 // Figure out if XMM registers are in use.
3303 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3305, __PRETTY_FUNCTION__))
3304 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3305, __PRETTY_FUNCTION__))
3305 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3305, __PRETTY_FUNCTION__))
;
3306
3307 // 64-bit calling conventions support varargs and register parameters, so we
3308 // have to do extra work to spill them in the prologue.
3309 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3310 // Find the first unallocated argument registers.
3311 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3312 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3313 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3314 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3315 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3316, __PRETTY_FUNCTION__))
3316 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3316, __PRETTY_FUNCTION__))
;
3317
3318 // Gather all the live in physical registers.
3319 SmallVector<SDValue, 6> LiveGPRs;
3320 SmallVector<SDValue, 8> LiveXMMRegs;
3321 SDValue ALVal;
3322 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3323 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3324 LiveGPRs.push_back(
3325 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3326 }
3327 if (!ArgXMMs.empty()) {
3328 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3329 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3330 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3331 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3332 LiveXMMRegs.push_back(
3333 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3334 }
3335 }
3336
3337 if (IsWin64) {
3338 // Get to the caller-allocated home save location. Add 8 to account
3339 // for the return address.
3340 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3341 FuncInfo->setRegSaveFrameIndex(
3342 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3343 // Fixup to set vararg frame on shadow area (4 x i64).
3344 if (NumIntRegs < 4)
3345 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3346 } else {
3347 // For X86-64, if there are vararg parameters that are passed via
3348 // registers, then we must store them to their spots on the stack so
3349 // they may be loaded by dereferencing the result of va_next.
3350 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3351 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3352 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3353 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3354 }
3355
3356 // Store the integer parameter registers.
3357 SmallVector<SDValue, 8> MemOps;
3358 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3359 getPointerTy(DAG.getDataLayout()));
3360 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3361 for (SDValue Val : LiveGPRs) {
3362 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3363 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3364 SDValue Store =
3365 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3366 MachinePointerInfo::getFixedStack(
3367 DAG.getMachineFunction(),
3368 FuncInfo->getRegSaveFrameIndex(), Offset));
3369 MemOps.push_back(Store);
3370 Offset += 8;
3371 }
3372
3373 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3374 // Now store the XMM (fp + vector) parameter registers.
3375 SmallVector<SDValue, 12> SaveXMMOps;
3376 SaveXMMOps.push_back(Chain);
3377 SaveXMMOps.push_back(ALVal);
3378 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3379 FuncInfo->getRegSaveFrameIndex(), dl));
3380 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3381 FuncInfo->getVarArgsFPOffset(), dl));
3382 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3383 LiveXMMRegs.end());
3384 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3385 MVT::Other, SaveXMMOps));
3386 }
3387
3388 if (!MemOps.empty())
3389 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3390 }
3391
3392 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3393 // Find the largest legal vector type.
3394 MVT VecVT = MVT::Other;
3395 // FIXME: Only some x86_32 calling conventions support AVX512.
3396 if (Subtarget.hasAVX512() &&
3397 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3398 CallConv == CallingConv::Intel_OCL_BI)))
3399 VecVT = MVT::v16f32;
3400 else if (Subtarget.hasAVX())
3401 VecVT = MVT::v8f32;
3402 else if (Subtarget.hasSSE2())
3403 VecVT = MVT::v4f32;
3404
3405 // We forward some GPRs and some vector types.
3406 SmallVector<MVT, 2> RegParmTypes;
3407 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3408 RegParmTypes.push_back(IntVT);
3409 if (VecVT != MVT::Other)
3410 RegParmTypes.push_back(VecVT);
3411
3412 // Compute the set of forwarded registers. The rest are scratch.
3413 SmallVectorImpl<ForwardedRegister> &Forwards =
3414 FuncInfo->getForwardedMustTailRegParms();
3415 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3416
3417 // Conservatively forward AL on x86_64, since it might be used for varargs.
3418 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3419 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3420 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3421 }
3422
3423 // Copy all forwards from physical to virtual registers.
3424 for (ForwardedRegister &FR : Forwards) {
3425 // FIXME: Can we use a less constrained schedule?
3426 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3427 FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3428 Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3429 }
3430 }
3431
3432 // Some CCs need callee pop.
3433 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3434 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3435 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3436 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3437 // X86 interrupts must pop the error code (and the alignment padding) if
3438 // present.
3439 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3440 } else {
3441 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3442 // If this is an sret function, the return should pop the hidden pointer.
3443 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3444 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3445 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3446 FuncInfo->setBytesToPopOnReturn(4);
3447 }
3448
3449 if (!Is64Bit) {
3450 // RegSaveFrameIndex is X86-64 only.
3451 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3452 if (CallConv == CallingConv::X86_FastCall ||
3453 CallConv == CallingConv::X86_ThisCall)
3454 // fastcc functions can't have varargs.
3455 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3456 }
3457
3458 FuncInfo->setArgumentStackSize(StackSize);
3459
3460 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3461 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3462 if (Personality == EHPersonality::CoreCLR) {
3463 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3463, __PRETTY_FUNCTION__))
;
3464 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3465 // that we'd prefer this slot be allocated towards the bottom of the frame
3466 // (i.e. near the stack pointer after allocating the frame). Every
3467 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3468 // offset from the bottom of this and each funclet's frame must be the
3469 // same, so the size of funclets' (mostly empty) frames is dictated by
3470 // how far this slot is from the bottom (since they allocate just enough
3471 // space to accommodate holding this slot at the correct offset).
3472 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3473 EHInfo->PSPSymFrameIdx = PSPSymFI;
3474 }
3475 }
3476
3477 if (CallConv == CallingConv::X86_RegCall ||
3478 F.hasFnAttribute("no_caller_saved_registers")) {
3479 MachineRegisterInfo &MRI = MF.getRegInfo();
3480 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3481 MRI.disableCalleeSavedRegister(Pair.first);
3482 }
3483
3484 return Chain;
3485}
3486
3487SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3488 SDValue Arg, const SDLoc &dl,
3489 SelectionDAG &DAG,
3490 const CCValAssign &VA,
3491 ISD::ArgFlagsTy Flags) const {
3492 unsigned LocMemOffset = VA.getLocMemOffset();
3493 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3494 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3495 StackPtr, PtrOff);
3496 if (Flags.isByVal())
3497 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3498
3499 return DAG.getStore(
3500 Chain, dl, Arg, PtrOff,
3501 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3502}
3503
3504/// Emit a load of return address if tail call
3505/// optimization is performed and it is required.
3506SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3507 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3508 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3509 // Adjust the Return address stack slot.
3510 EVT VT = getPointerTy(DAG.getDataLayout());
3511 OutRetAddr = getReturnAddressFrameIndex(DAG);
3512
3513 // Load the "old" Return address.
3514 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3515 return SDValue(OutRetAddr.getNode(), 1);
3516}
3517
3518/// Emit a store of the return address if tail call
3519/// optimization is performed and it is required (FPDiff!=0).
3520static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3521 SDValue Chain, SDValue RetAddrFrIdx,
3522 EVT PtrVT, unsigned SlotSize,
3523 int FPDiff, const SDLoc &dl) {
3524 // Store the return address to the appropriate stack slot.
3525 if (!FPDiff) return Chain;
3526 // Calculate the new stack slot for the return address.
3527 int NewReturnAddrFI =
3528 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3529 false);
3530 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3531 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3532 MachinePointerInfo::getFixedStack(
3533 DAG.getMachineFunction(), NewReturnAddrFI));
3534 return Chain;
3535}
3536
3537/// Returns a vector_shuffle mask for an movs{s|d}, movd
3538/// operation of specified width.
3539static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3540 SDValue V2) {
3541 unsigned NumElems = VT.getVectorNumElements();
3542 SmallVector<int, 8> Mask;
3543 Mask.push_back(NumElems);
3544 for (unsigned i = 1; i != NumElems; ++i)
3545 Mask.push_back(i);
3546 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3547}
3548
3549SDValue
3550X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3551 SmallVectorImpl<SDValue> &InVals) const {
3552 SelectionDAG &DAG = CLI.DAG;
3553 SDLoc &dl = CLI.DL;
3554 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3555 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3556 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3557 SDValue Chain = CLI.Chain;
3558 SDValue Callee = CLI.Callee;
3559 CallingConv::ID CallConv = CLI.CallConv;
3560 bool &isTailCall = CLI.IsTailCall;
3561 bool isVarArg = CLI.IsVarArg;
3562
3563 MachineFunction &MF = DAG.getMachineFunction();
3564 bool Is64Bit = Subtarget.is64Bit();
3565 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3566 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3567 bool IsSibcall = false;
3568 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3569 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3570 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3571 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3572 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3573 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3574 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3575 bool HasNoCfCheck =
3576 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3577 const Module *M = MF.getMMI().getModule();
3578 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3579
3580 if (CallConv == CallingConv::X86_INTR)
3581 report_fatal_error("X86 interrupts may not be called directly");
3582
3583 if (Attr.getValueAsString() == "true")
3584 isTailCall = false;
3585
3586 if (Subtarget.isPICStyleGOT() &&
3587 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3588 // If we are using a GOT, disable tail calls to external symbols with
3589 // default visibility. Tail calling such a symbol requires using a GOT
3590 // relocation, which forces early binding of the symbol. This breaks code
3591 // that require lazy function symbol resolution. Using musttail or
3592 // GuaranteedTailCallOpt will override this.
3593 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3594 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3595 G->getGlobal()->hasDefaultVisibility()))
3596 isTailCall = false;
3597 }
3598
3599 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3600 if (IsMustTail) {
3601 // Force this to be a tail call. The verifier rules are enough to ensure
3602 // that we can lower this successfully without moving the return address
3603 // around.
3604 isTailCall = true;
3605 } else if (isTailCall) {
3606 // Check if it's really possible to do a tail call.
3607 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3608 isVarArg, SR != NotStructReturn,
3609 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3610 Outs, OutVals, Ins, DAG);
3611
3612 // Sibcalls are automatically detected tailcalls which do not require
3613 // ABI changes.
3614 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3615 IsSibcall = true;
3616
3617 if (isTailCall)
3618 ++NumTailCalls;
3619 }
3620
3621 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3622, __PRETTY_FUNCTION__))
3622 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3622, __PRETTY_FUNCTION__))
;
3623
3624 // Analyze operands of the call, assigning locations to each operand.
3625 SmallVector<CCValAssign, 16> ArgLocs;
3626 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3627
3628 // Allocate shadow area for Win64.
3629 if (IsWin64)
3630 CCInfo.AllocateStack(32, 8);
3631
3632 CCInfo.AnalyzeArguments(Outs, CC_X86);
3633
3634 // In vectorcall calling convention a second pass is required for the HVA
3635 // types.
3636 if (CallingConv::X86_VectorCall == CallConv) {
3637 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3638 }
3639
3640 // Get a count of how many bytes are to be pushed on the stack.
3641 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3642 if (IsSibcall)
3643 // This is a sibcall. The memory operands are available in caller's
3644 // own caller's stack.
3645 NumBytes = 0;
3646 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3647 canGuaranteeTCO(CallConv))
3648 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3649
3650 int FPDiff = 0;
3651 if (isTailCall && !IsSibcall && !IsMustTail) {
3652 // Lower arguments at fp - stackoffset + fpdiff.
3653 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3654
3655 FPDiff = NumBytesCallerPushed - NumBytes;
3656
3657 // Set the delta of movement of the returnaddr stackslot.
3658 // But only set if delta is greater than previous delta.
3659 if (FPDiff < X86Info->getTCReturnAddrDelta())
3660 X86Info->setTCReturnAddrDelta(FPDiff);
3661 }
3662
3663 unsigned NumBytesToPush = NumBytes;
3664 unsigned NumBytesToPop = NumBytes;
3665
3666 // If we have an inalloca argument, all stack space has already been allocated
3667 // for us and be right at the top of the stack. We don't support multiple
3668 // arguments passed in memory when using inalloca.
3669 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3670 NumBytesToPush = 0;
3671 if (!ArgLocs.back().isMemLoc())
3672 report_fatal_error("cannot use inalloca attribute on a register "
3673 "parameter");
3674 if (ArgLocs.back().getLocMemOffset() != 0)
3675 report_fatal_error("any parameter with the inalloca attribute must be "
3676 "the only memory argument");
3677 }
3678
3679 if (!IsSibcall)
3680 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3681 NumBytes - NumBytesToPush, dl);
3682
3683 SDValue RetAddrFrIdx;
3684 // Load return address for tail calls.
3685 if (isTailCall && FPDiff)
3686 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3687 Is64Bit, FPDiff, dl);
3688
3689 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3690 SmallVector<SDValue, 8> MemOpChains;
3691 SDValue StackPtr;
3692
3693 // The next loop assumes that the locations are in the same order of the
3694 // input arguments.
3695 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3696, __PRETTY_FUNCTION__))
3696 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3696, __PRETTY_FUNCTION__))
;
3697
3698 // Walk the register/memloc assignments, inserting copies/loads. In the case
3699 // of tail call optimization arguments are handle later.
3700 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3701 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3702 ++I, ++OutIndex) {
3703 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3703, __PRETTY_FUNCTION__))
;
3704 // Skip inalloca arguments, they have already been written.
3705 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3706 if (Flags.isInAlloca())
3707 continue;
3708
3709 CCValAssign &VA = ArgLocs[I];
3710 EVT RegVT = VA.getLocVT();
3711 SDValue Arg = OutVals[OutIndex];
3712 bool isByVal = Flags.isByVal();
3713
3714 // Promote the value if needed.
3715 switch (VA.getLocInfo()) {
3716 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3716)
;
3717 case CCValAssign::Full: break;
3718 case CCValAssign::SExt:
3719 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3720 break;
3721 case CCValAssign::ZExt:
3722 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3723 break;
3724 case CCValAssign::AExt:
3725 if (Arg.getValueType().isVector() &&
3726 Arg.getValueType().getVectorElementType() == MVT::i1)
3727 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3728 else if (RegVT.is128BitVector()) {
3729 // Special case: passing MMX values in XMM registers.
3730 Arg = DAG.getBitcast(MVT::i64, Arg);
3731 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3732 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3733 } else
3734 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3735 break;
3736 case CCValAssign::BCvt:
3737 Arg = DAG.getBitcast(RegVT, Arg);
3738 break;
3739 case CCValAssign::Indirect: {
3740 if (isByVal) {
3741 // Memcpy the argument to a temporary stack slot to prevent
3742 // the caller from seeing any modifications the callee may make
3743 // as guaranteed by the `byval` attribute.
3744 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3745 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3746 false);
3747 SDValue StackSlot =
3748 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3749 Chain =
3750 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3751 // From now on treat this as a regular pointer
3752 Arg = StackSlot;
3753 isByVal = false;
3754 } else {
3755 // Store the argument.
3756 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3757 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3758 Chain = DAG.getStore(
3759 Chain, dl, Arg, SpillSlot,
3760 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3761 Arg = SpillSlot;
3762 }
3763 break;
3764 }
3765 }
3766
3767 if (VA.needsCustom()) {
3768 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3769, __PRETTY_FUNCTION__))
3769 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3769, __PRETTY_FUNCTION__))
;
3770 // Split v64i1 value into two registers
3771 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3772 Subtarget);
3773 } else if (VA.isRegLoc()) {
3774 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3775 if (isVarArg && IsWin64) {
3776 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3777 // shadow reg if callee is a varargs function.
3778 unsigned ShadowReg = 0;
3779 switch (VA.getLocReg()) {
3780 case X86::XMM0: ShadowReg = X86::RCX; break;
3781 case X86::XMM1: ShadowReg = X86::RDX; break;
3782 case X86::XMM2: ShadowReg = X86::R8; break;
3783 case X86::XMM3: ShadowReg = X86::R9; break;
3784 }
3785 if (ShadowReg)
3786 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3787 }
3788 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3789 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3789, __PRETTY_FUNCTION__))
;
3790 if (!StackPtr.getNode())
3791 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3792 getPointerTy(DAG.getDataLayout()));
3793 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3794 dl, DAG, VA, Flags));
3795 }
3796 }
3797
3798 if (!MemOpChains.empty())
3799 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3800
3801 if (Subtarget.isPICStyleGOT()) {
3802 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3803 // GOT pointer.
3804 if (!isTailCall) {
3805 RegsToPass.push_back(std::make_pair(
3806 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3807 getPointerTy(DAG.getDataLayout()))));
3808 } else {
3809 // If we are tail calling and generating PIC/GOT style code load the
3810 // address of the callee into ECX. The value in ecx is used as target of
3811 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3812 // for tail calls on PIC/GOT architectures. Normally we would just put the
3813 // address of GOT into ebx and then call target@PLT. But for tail calls
3814 // ebx would be restored (since ebx is callee saved) before jumping to the
3815 // target@PLT.
3816
3817 // Note: The actual moving to ECX is done further down.
3818 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3819 if (G && !G->getGlobal()->hasLocalLinkage() &&
3820 G->getGlobal()->hasDefaultVisibility())
3821 Callee = LowerGlobalAddress(Callee, DAG);
3822 else if (isa<ExternalSymbolSDNode>(Callee))
3823 Callee = LowerExternalSymbol(Callee, DAG);
3824 }
3825 }
3826
3827 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3828 // From AMD64 ABI document:
3829 // For calls that may call functions that use varargs or stdargs
3830 // (prototype-less calls or calls to functions containing ellipsis (...) in
3831 // the declaration) %al is used as hidden argument to specify the number
3832 // of SSE registers used. The contents of %al do not need to match exactly
3833 // the number of registers, but must be an ubound on the number of SSE
3834 // registers used and is in the range 0 - 8 inclusive.
3835
3836 // Count the number of XMM registers allocated.
3837 static const MCPhysReg XMMArgRegs[] = {
3838 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3839 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3840 };
3841 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3842 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3843, __PRETTY_FUNCTION__))
3843 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3843, __PRETTY_FUNCTION__))
;
3844
3845 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3846 DAG.getConstant(NumXMMRegs, dl,
3847 MVT::i8)));
3848 }
3849
3850 if (isVarArg && IsMustTail) {
3851 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3852 for (const auto &F : Forwards) {
3853 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3854 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3855 }
3856 }
3857
3858 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3859 // don't need this because the eligibility check rejects calls that require
3860 // shuffling arguments passed in memory.
3861 if (!IsSibcall && isTailCall) {
3862 // Force all the incoming stack arguments to be loaded from the stack
3863 // before any new outgoing arguments are stored to the stack, because the
3864 // outgoing stack slots may alias the incoming argument stack slots, and
3865 // the alias isn't otherwise explicit. This is slightly more conservative
3866 // than necessary, because it means that each store effectively depends
3867 // on every argument instead of just those arguments it would clobber.
3868 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3869
3870 SmallVector<SDValue, 8> MemOpChains2;
3871 SDValue FIN;
3872 int FI = 0;
3873 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3874 ++I, ++OutsIndex) {
3875 CCValAssign &VA = ArgLocs[I];
3876
3877 if (VA.isRegLoc()) {
3878 if (VA.needsCustom()) {
3879 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3880, __PRETTY_FUNCTION__))
3880 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3880, __PRETTY_FUNCTION__))
;
3881 // This means that we are in special case where one argument was
3882 // passed through two register locations - Skip the next location
3883 ++I;
3884 }
3885
3886 continue;
3887 }
3888
3889 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3889, __PRETTY_FUNCTION__))
;
3890 SDValue Arg = OutVals[OutsIndex];
3891 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3892 // Skip inalloca arguments. They don't require any work.
3893 if (Flags.isInAlloca())
3894 continue;
3895 // Create frame index.
3896 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3897 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3898 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3899 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3900
3901 if (Flags.isByVal()) {
3902 // Copy relative to framepointer.
3903 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3904 if (!StackPtr.getNode())
3905 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3906 getPointerTy(DAG.getDataLayout()));
3907 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3908 StackPtr, Source);
3909
3910 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3911 ArgChain,
3912 Flags, DAG, dl));
3913 } else {
3914 // Store relative to framepointer.
3915 MemOpChains2.push_back(DAG.getStore(
3916 ArgChain, dl, Arg, FIN,
3917 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3918 }
3919 }
3920
3921 if (!MemOpChains2.empty())
3922 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3923
3924 // Store the return address to the appropriate stack slot.
3925 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3926 getPointerTy(DAG.getDataLayout()),
3927 RegInfo->getSlotSize(), FPDiff, dl);
3928 }
3929
3930 // Build a sequence of copy-to-reg nodes chained together with token chain
3931 // and flag operands which copy the outgoing args into registers.
3932 SDValue InFlag;
3933 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3934 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3935 RegsToPass[i].second, InFlag);
3936 InFlag = Chain.getValue(1);
3937 }
3938
3939 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3940 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3940, __PRETTY_FUNCTION__))
;
3941 // In the 64-bit large code model, we have to make all calls
3942 // through a register, since the call instruction's 32-bit
3943 // pc-relative offset may not be large enough to hold the whole
3944 // address.
3945 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
3946 Callee->getOpcode() == ISD::ExternalSymbol) {
3947 // Lower direct calls to global addresses and external symbols. Setting
3948 // ForCall to true here has the effect of removing WrapperRIP when possible
3949 // to allow direct calls to be selected without first materializing the
3950 // address into a register.
3951 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
3952 } else if (Subtarget.isTarget64BitILP32() &&
3953 Callee->getValueType(0) == MVT::i32) {
3954 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3955 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3956 }
3957
3958 // Returns a chain & a flag for retval copy to use.
3959 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3960 SmallVector<SDValue, 8> Ops;
3961
3962 if (!IsSibcall && isTailCall) {
3963 Chain = DAG.getCALLSEQ_END(Chain,
3964 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3965 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3966 InFlag = Chain.getValue(1);
3967 }
3968
3969 Ops.push_back(Chain);
3970 Ops.push_back(Callee);
3971
3972 if (isTailCall)
3973 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3974
3975 // Add argument registers to the end of the list so that they are known live
3976 // into the call.
3977 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3978 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3979 RegsToPass[i].second.getValueType()));
3980
3981 // Add a register mask operand representing the call-preserved registers.
3982 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3983 // set X86_INTR calling convention because it has the same CSR mask
3984 // (same preserved registers).
3985 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3986 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3987 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 3987, __PRETTY_FUNCTION__))
;
3988
3989 // If this is an invoke in a 32-bit function using a funclet-based
3990 // personality, assume the function clobbers all registers. If an exception
3991 // is thrown, the runtime will not restore CSRs.
3992 // FIXME: Model this more precisely so that we can register allocate across
3993 // the normal edge and spill and fill across the exceptional edge.
3994 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3995 const Function &CallerFn = MF.getFunction();
3996 EHPersonality Pers =
3997 CallerFn.hasPersonalityFn()
3998 ? classifyEHPersonality(CallerFn.getPersonalityFn())
3999 : EHPersonality::Unknown;
4000 if (isFuncletEHPersonality(Pers))
4001 Mask = RegInfo->getNoPreservedMask();
4002 }
4003
4004 // Define a new register mask from the existing mask.
4005 uint32_t *RegMask = nullptr;
4006
4007 // In some calling conventions we need to remove the used physical registers
4008 // from the reg mask.
4009 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4010 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4011
4012 // Allocate a new Reg Mask and copy Mask.
4013 RegMask = MF.allocateRegMask();
4014 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4015 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4016
4017 // Make sure all sub registers of the argument registers are reset
4018 // in the RegMask.
4019 for (auto const &RegPair : RegsToPass)
4020 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4021 SubRegs.isValid(); ++SubRegs)
4022 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4023
4024 // Create the RegMask Operand according to our updated mask.
4025 Ops.push_back(DAG.getRegisterMask(RegMask));
4026 } else {
4027 // Create the RegMask Operand according to the static mask.
4028 Ops.push_back(DAG.getRegisterMask(Mask));
4029 }
4030
4031 if (InFlag.getNode())
4032 Ops.push_back(InFlag);
4033
4034 if (isTailCall) {
4035 // We used to do:
4036 //// If this is the first return lowered for this function, add the regs
4037 //// to the liveout set for the function.
4038 // This isn't right, although it's probably harmless on x86; liveouts
4039 // should be computed from returns not tail calls. Consider a void
4040 // function making a tail call to a function returning int.
4041 MF.getFrameInfo().setHasTailCall();
4042 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4043 }
4044
4045 if (HasNoCfCheck && IsCFProtectionSupported) {
4046 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4047 } else {
4048 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4049 }
4050 InFlag = Chain.getValue(1);
4051
4052 // Create the CALLSEQ_END node.
4053 unsigned NumBytesForCalleeToPop;
4054 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4055 DAG.getTarget().Options.GuaranteedTailCallOpt))
4056 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4057 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4058 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4059 SR == StackStructReturn)
4060 // If this is a call to a struct-return function, the callee
4061 // pops the hidden struct pointer, so we have to push it back.
4062 // This is common for Darwin/X86, Linux & Mingw32 targets.
4063 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4064 NumBytesForCalleeToPop = 4;
4065 else
4066 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4067
4068 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4069 // No need to reset the stack after the call if the call doesn't return. To
4070 // make the MI verify, we'll pretend the callee does it for us.
4071 NumBytesForCalleeToPop = NumBytes;
4072 }
4073
4074 // Returns a flag for retval copy to use.
4075 if (!IsSibcall) {
4076 Chain = DAG.getCALLSEQ_END(Chain,
4077 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4078 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4079 true),
4080 InFlag, dl);
4081 InFlag = Chain.getValue(1);
4082 }
4083
4084 // Handle result values, copying them out of physregs into vregs that we
4085 // return.
4086 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4087 InVals, RegMask);
4088}
4089
4090//===----------------------------------------------------------------------===//
4091// Fast Calling Convention (tail call) implementation
4092//===----------------------------------------------------------------------===//
4093
4094// Like std call, callee cleans arguments, convention except that ECX is
4095// reserved for storing the tail called function address. Only 2 registers are
4096// free for argument passing (inreg). Tail call optimization is performed
4097// provided:
4098// * tailcallopt is enabled
4099// * caller/callee are fastcc
4100// On X86_64 architecture with GOT-style position independent code only local
4101// (within module) calls are supported at the moment.
4102// To keep the stack aligned according to platform abi the function
4103// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4104// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4105// If a tail called function callee has more arguments than the caller the
4106// caller needs to make sure that there is room to move the RETADDR to. This is
4107// achieved by reserving an area the size of the argument delta right after the
4108// original RETADDR, but before the saved framepointer or the spilled registers
4109// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4110// stack layout:
4111// arg1
4112// arg2
4113// RETADDR
4114// [ new RETADDR
4115// move area ]
4116// (possible EBP)
4117// ESI
4118// EDI
4119// local1 ..
4120
4121/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4122/// requirement.
4123unsigned
4124X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4125 SelectionDAG& DAG) const {
4126 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4127 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4128 unsigned StackAlignment = TFI.getStackAlignment();
4129 uint64_t AlignMask = StackAlignment - 1;
4130 int64_t Offset = StackSize;
4131 unsigned SlotSize = RegInfo->getSlotSize();
4132 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4133 // Number smaller than 12 so just add the difference.
4134 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4135 } else {
4136 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4137 Offset = ((~AlignMask) & Offset) + StackAlignment +
4138 (StackAlignment-SlotSize);
4139 }
4140 return Offset;
4141}
4142
4143/// Return true if the given stack call argument is already available in the
4144/// same position (relatively) of the caller's incoming argument stack.
4145static
4146bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4147 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4148 const X86InstrInfo *TII, const CCValAssign &VA) {
4149 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4150
4151 for (;;) {
4152 // Look through nodes that don't alter the bits of the incoming value.
4153 unsigned Op = Arg.getOpcode();
4154 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4155 Arg = Arg.getOperand(0);
4156 continue;
4157 }
4158 if (Op == ISD::TRUNCATE) {
4159 const SDValue &TruncInput = Arg.getOperand(0);
4160 if (TruncInput.getOpcode() == ISD::AssertZext &&
4161 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4162 Arg.getValueType()) {
4163 Arg = TruncInput.getOperand(0);
4164 continue;
4165 }
4166 }
4167 break;
4168 }
4169
4170 int FI = INT_MAX2147483647;
4171 if (Arg.getOpcode() == ISD::CopyFromReg) {
4172 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4173 if (!TargetRegisterInfo::isVirtualRegister(VR))
4174 return false;
4175 MachineInstr *Def = MRI->getVRegDef(VR);
4176 if (!Def)
4177 return false;
4178 if (!Flags.isByVal()) {
4179 if (!TII->isLoadFromStackSlot(*Def, FI))
4180 return false;
4181 } else {
4182 unsigned Opcode = Def->getOpcode();
4183 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4184 Opcode == X86::LEA64_32r) &&
4185 Def->getOperand(1).isFI()) {
4186 FI = Def->getOperand(1).getIndex();
4187 Bytes = Flags.getByValSize();
4188 } else
4189 return false;
4190 }
4191 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4192 if (Flags.isByVal())
4193 // ByVal argument is passed in as a pointer but it's now being
4194 // dereferenced. e.g.
4195 // define @foo(%struct.X* %A) {
4196 // tail call @bar(%struct.X* byval %A)
4197 // }
4198 return false;
4199 SDValue Ptr = Ld->getBasePtr();
4200 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4201 if (!FINode)
4202 return false;
4203 FI = FINode->getIndex();
4204 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4205 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4206 FI = FINode->getIndex();
4207 Bytes = Flags.getByValSize();
4208 } else
4209 return false;
4210
4211 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4211, __PRETTY_FUNCTION__))
;
4212 if (!MFI.isFixedObjectIndex(FI))
4213 return false;
4214
4215 if (Offset != MFI.getObjectOffset(FI))
4216 return false;
4217
4218 // If this is not byval, check that the argument stack object is immutable.
4219 // inalloca and argument copy elision can create mutable argument stack
4220 // objects. Byval objects can be mutated, but a byval call intends to pass the
4221 // mutated memory.
4222 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4223 return false;
4224
4225 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4226 // If the argument location is wider than the argument type, check that any
4227 // extension flags match.
4228 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4229 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4230 return false;
4231 }
4232 }
4233
4234 return Bytes == MFI.getObjectSize(FI);
4235}
4236
4237/// Check whether the call is eligible for tail call optimization. Targets
4238/// that want to do tail call optimization should implement this function.
4239bool X86TargetLowering::IsEligibleForTailCallOptimization(
4240 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4241 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4242 const SmallVectorImpl<ISD::OutputArg> &Outs,
4243 const SmallVectorImpl<SDValue> &OutVals,
4244 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4245 if (!mayTailCallThisCC(CalleeCC))
4246 return false;
4247
4248 // If -tailcallopt is specified, make fastcc functions tail-callable.
4249 MachineFunction &MF = DAG.getMachineFunction();
4250 const Function &CallerF = MF.getFunction();
4251
4252 // If the function return type is x86_fp80 and the callee return type is not,
4253 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4254 // perform a tailcall optimization here.
4255 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4256 return false;
4257
4258 CallingConv::ID CallerCC = CallerF.getCallingConv();
4259 bool CCMatch = CallerCC == CalleeCC;
4260 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4261 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4262
4263 // Win64 functions have extra shadow space for argument homing. Don't do the
4264 // sibcall if the caller and callee have mismatched expectations for this
4265 // space.
4266 if (IsCalleeWin64 != IsCallerWin64)
4267 return false;
4268
4269 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4270 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4271 return true;
4272 return false;
4273 }
4274
4275 // Look for obvious safe cases to perform tail call optimization that do not
4276 // require ABI changes. This is what gcc calls sibcall.
4277
4278 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4279 // emit a special epilogue.
4280 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4281 if (RegInfo->needsStackRealignment(MF))
4282 return false;
4283
4284 // Also avoid sibcall optimization if either caller or callee uses struct
4285 // return semantics.
4286 if (isCalleeStructRet || isCallerStructRet)
4287 return false;
4288
4289 // Do not sibcall optimize vararg calls unless all arguments are passed via
4290 // registers.
4291 LLVMContext &C = *DAG.getContext();
4292 if (isVarArg && !Outs.empty()) {
4293 // Optimizing for varargs on Win64 is unlikely to be safe without
4294 // additional testing.
4295 if (IsCalleeWin64 || IsCallerWin64)
4296 return false;
4297
4298 SmallVector<CCValAssign, 16> ArgLocs;
4299 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4300
4301 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4302 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4303 if (!ArgLocs[i].isRegLoc())
4304 return false;
4305 }
4306
4307 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4308 // stack. Therefore, if it's not used by the call it is not safe to optimize
4309 // this into a sibcall.
4310 bool Unused = false;
4311 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4312 if (!Ins[i].Used) {
4313 Unused = true;
4314 break;
4315 }
4316 }
4317 if (Unused) {
4318 SmallVector<CCValAssign, 16> RVLocs;
4319 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4320 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4321 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4322 CCValAssign &VA = RVLocs[i];
4323 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4324 return false;
4325 }
4326 }
4327
4328 // Check that the call results are passed in the same way.
4329 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4330 RetCC_X86, RetCC_X86))
4331 return false;
4332 // The callee has to preserve all registers the caller needs to preserve.
4333 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4334 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4335 if (!CCMatch) {
4336 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4337 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4338 return false;
4339 }
4340
4341 unsigned StackArgsSize = 0;
4342
4343 // If the callee takes no arguments then go on to check the results of the
4344 // call.
4345 if (!Outs.empty()) {
4346 // Check if stack adjustment is needed. For now, do not do this if any
4347 // argument is passed on the stack.
4348 SmallVector<CCValAssign, 16> ArgLocs;
4349 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4350
4351 // Allocate shadow area for Win64
4352 if (IsCalleeWin64)
4353 CCInfo.AllocateStack(32, 8);
4354
4355 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4356 StackArgsSize = CCInfo.getNextStackOffset();
4357
4358 if (CCInfo.getNextStackOffset()) {
4359 // Check if the arguments are already laid out in the right way as
4360 // the caller's fixed stack objects.
4361 MachineFrameInfo &MFI = MF.getFrameInfo();
4362 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4363 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4364 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4365 CCValAssign &VA = ArgLocs[i];
4366 SDValue Arg = OutVals[i];
4367 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4368 if (VA.getLocInfo() == CCValAssign::Indirect)
4369 return false;
4370 if (!VA.isRegLoc()) {
4371 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4372 MFI, MRI, TII, VA))
4373 return false;
4374 }
4375 }
4376 }
4377
4378 bool PositionIndependent = isPositionIndependent();
4379 // If the tailcall address may be in a register, then make sure it's
4380 // possible to register allocate for it. In 32-bit, the call address can
4381 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4382 // callee-saved registers are restored. These happen to be the same
4383 // registers used to pass 'inreg' arguments so watch out for those.
4384 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4385 !isa<ExternalSymbolSDNode>(Callee)) ||
4386 PositionIndependent)) {
4387 unsigned NumInRegs = 0;
4388 // In PIC we need an extra register to formulate the address computation
4389 // for the callee.
4390 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4391
4392 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4393 CCValAssign &VA = ArgLocs[i];
4394 if (!VA.isRegLoc())
4395 continue;
4396 unsigned Reg = VA.getLocReg();
4397 switch (Reg) {
4398 default: break;
4399 case X86::EAX: case X86::EDX: case X86::ECX:
4400 if (++NumInRegs == MaxInRegs)
4401 return false;
4402 break;
4403 }
4404 }
4405 }
4406
4407 const MachineRegisterInfo &MRI = MF.getRegInfo();
4408 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4409 return false;
4410 }
4411
4412 bool CalleeWillPop =
4413 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4414 MF.getTarget().Options.GuaranteedTailCallOpt);
4415
4416 if (unsigned BytesToPop =
4417 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4418 // If we have bytes to pop, the callee must pop them.
4419 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4420 if (!CalleePopMatches)
4421 return false;
4422 } else if (CalleeWillPop && StackArgsSize > 0) {
4423 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4424 return false;
4425 }
4426
4427 return true;
4428}
4429
4430FastISel *
4431X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4432 const TargetLibraryInfo *libInfo) const {
4433 return X86::createFastISel(funcInfo, libInfo);
4434}
4435
4436//===----------------------------------------------------------------------===//
4437// Other Lowering Hooks
4438//===----------------------------------------------------------------------===//
4439
4440static bool MayFoldLoad(SDValue Op) {
4441 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4442}
4443
4444static bool MayFoldIntoStore(SDValue Op) {
4445 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4446}
4447
4448static bool MayFoldIntoZeroExtend(SDValue Op) {
4449 if (Op.hasOneUse()) {
4450 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4451 return (ISD::ZERO_EXTEND == Opcode);
4452 }
4453 return false;
4454}
4455
4456static bool isTargetShuffle(unsigned Opcode) {
4457 switch(Opcode) {
4458 default: return false;
4459 case X86ISD::BLENDI:
4460 case X86ISD::PSHUFB:
4461 case X86ISD::PSHUFD:
4462 case X86ISD::PSHUFHW:
4463 case X86ISD::PSHUFLW:
4464 case X86ISD::SHUFP:
4465 case X86ISD::INSERTPS:
4466 case X86ISD::EXTRQI:
4467 case X86ISD::INSERTQI:
4468 case X86ISD::PALIGNR:
4469 case X86ISD::VSHLDQ:
4470 case X86ISD::VSRLDQ:
4471 case X86ISD::MOVLHPS:
4472 case X86ISD::MOVHLPS:
4473 case X86ISD::MOVSHDUP:
4474 case X86ISD::MOVSLDUP:
4475 case X86ISD::MOVDDUP:
4476 case X86ISD::MOVSS:
4477 case X86ISD::MOVSD:
4478 case X86ISD::UNPCKL:
4479 case X86ISD::UNPCKH:
4480 case X86ISD::VBROADCAST:
4481 case X86ISD::VPERMILPI:
4482 case X86ISD::VPERMILPV:
4483 case X86ISD::VPERM2X128:
4484 case X86ISD::SHUF128:
4485 case X86ISD::VPERMIL2:
4486 case X86ISD::VPERMI:
4487 case X86ISD::VPPERM:
4488 case X86ISD::VPERMV:
4489 case X86ISD::VPERMV3:
4490 case X86ISD::VZEXT_MOVL:
4491 return true;
4492 }
4493}
4494
4495static bool isTargetShuffleVariableMask(unsigned Opcode) {
4496 switch (Opcode) {
4497 default: return false;
4498 // Target Shuffles.
4499 case X86ISD::PSHUFB:
4500 case X86ISD::VPERMILPV:
4501 case X86ISD::VPERMIL2:
4502 case X86ISD::VPPERM:
4503 case X86ISD::VPERMV:
4504 case X86ISD::VPERMV3:
4505 return true;
4506 // 'Faux' Target Shuffles.
4507 case ISD::OR:
4508 case ISD::AND:
4509 case X86ISD::ANDNP:
4510 return true;
4511 }
4512}
4513
4514SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4515 MachineFunction &MF = DAG.getMachineFunction();
4516 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4517 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4518 int ReturnAddrIndex = FuncInfo->getRAIndex();
4519
4520 if (ReturnAddrIndex == 0) {
4521 // Set up a frame object for the return address.
4522 unsigned SlotSize = RegInfo->getSlotSize();
4523 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4524 -(int64_t)SlotSize,
4525 false);
4526 FuncInfo->setRAIndex(ReturnAddrIndex);
4527 }
4528
4529 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4530}
4531
4532bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4533 bool hasSymbolicDisplacement) {
4534 // Offset should fit into 32 bit immediate field.
4535 if (!isInt<32>(Offset))
4536 return false;
4537
4538 // If we don't have a symbolic displacement - we don't have any extra
4539 // restrictions.
4540 if (!hasSymbolicDisplacement)
4541 return true;
4542
4543 // FIXME: Some tweaks might be needed for medium code model.
4544 if (M != CodeModel::Small && M != CodeModel::Kernel)
4545 return false;
4546
4547 // For small code model we assume that latest object is 16MB before end of 31
4548 // bits boundary. We may also accept pretty large negative constants knowing
4549 // that all objects are in the positive half of address space.
4550 if (M == CodeModel::Small && Offset < 16*1024*1024)
4551 return true;
4552
4553 // For kernel code model we know that all object resist in the negative half
4554 // of 32bits address space. We may not accept negative offsets, since they may
4555 // be just off and we may accept pretty large positive ones.
4556 if (M == CodeModel::Kernel && Offset >= 0)
4557 return true;
4558
4559 return false;
4560}
4561
4562/// Determines whether the callee is required to pop its own arguments.
4563/// Callee pop is necessary to support tail calls.
4564bool X86::isCalleePop(CallingConv::ID CallingConv,
4565 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4566 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4567 // can guarantee TCO.
4568 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4569 return true;
4570
4571 switch (CallingConv) {
4572 default:
4573 return false;
4574 case CallingConv::X86_StdCall:
4575 case CallingConv::X86_FastCall:
4576 case CallingConv::X86_ThisCall:
4577 case CallingConv::X86_VectorCall:
4578 return !is64Bit;
4579 }
4580}
4581
4582/// Return true if the condition is an unsigned comparison operation.
4583static bool isX86CCUnsigned(unsigned X86CC) {
4584 switch (X86CC) {
4585 default:
4586 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4586)
;
4587 case X86::COND_E:
4588 case X86::COND_NE:
4589 case X86::COND_B:
4590 case X86::COND_A:
4591 case X86::COND_BE:
4592 case X86::COND_AE:
4593 return true;
4594 case X86::COND_G:
4595 case X86::COND_GE:
4596 case X86::COND_L:
4597 case X86::COND_LE:
4598 return false;
4599 }
4600}
4601
4602static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4603 switch (SetCCOpcode) {
4604 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4604)
;
4605 case ISD::SETEQ: return X86::COND_E;
4606 case ISD::SETGT: return X86::COND_G;
4607 case ISD::SETGE: return X86::COND_GE;
4608 case ISD::SETLT: return X86::COND_L;
4609 case ISD::SETLE: return X86::COND_LE;
4610 case ISD::SETNE: return X86::COND_NE;
4611 case ISD::SETULT: return X86::COND_B;
4612 case ISD::SETUGT: return X86::COND_A;
4613 case ISD::SETULE: return X86::COND_BE;
4614 case ISD::SETUGE: return X86::COND_AE;
4615 }
4616}
4617
4618/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4619/// condition code, returning the condition code and the LHS/RHS of the
4620/// comparison to make.
4621static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4622 bool isFP, SDValue &LHS, SDValue &RHS,
4623 SelectionDAG &DAG) {
4624 if (!isFP) {
4625 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4626 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4627 // X > -1 -> X == 0, jump !sign.
4628 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4629 return X86::COND_NS;
4630 }
4631 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4632 // X < 0 -> X == 0, jump on sign.
4633 return X86::COND_S;
4634 }
4635 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4636 // X < 1 -> X <= 0
4637 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4638 return X86::COND_LE;
4639 }
4640 }
4641
4642 return TranslateIntegerX86CC(SetCCOpcode);
4643 }
4644
4645 // First determine if it is required or is profitable to flip the operands.
4646
4647 // If LHS is a foldable load, but RHS is not, flip the condition.
4648 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4649 !ISD::isNON_EXTLoad(RHS.getNode())) {
4650 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4651 std::swap(LHS, RHS);
4652 }
4653
4654 switch (SetCCOpcode) {
4655 default: break;
4656 case ISD::SETOLT:
4657 case ISD::SETOLE:
4658 case ISD::SETUGT:
4659 case ISD::SETUGE:
4660 std::swap(LHS, RHS);
4661 break;
4662 }
4663
4664 // On a floating point condition, the flags are set as follows:
4665 // ZF PF CF op
4666 // 0 | 0 | 0 | X > Y
4667 // 0 | 0 | 1 | X < Y
4668 // 1 | 0 | 0 | X == Y
4669 // 1 | 1 | 1 | unordered
4670 switch (SetCCOpcode) {
4671 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4671)
;
4672 case ISD::SETUEQ:
4673 case ISD::SETEQ: return X86::COND_E;
4674 case ISD::SETOLT: // flipped
4675 case ISD::SETOGT:
4676 case ISD::SETGT: return X86::COND_A;
4677 case ISD::SETOLE: // flipped
4678 case ISD::SETOGE:
4679 case ISD::SETGE: return X86::COND_AE;
4680 case ISD::SETUGT: // flipped
4681 case ISD::SETULT:
4682 case ISD::SETLT: return X86::COND_B;
4683 case ISD::SETUGE: // flipped
4684 case ISD::SETULE:
4685 case ISD::SETLE: return X86::COND_BE;
4686 case ISD::SETONE:
4687 case ISD::SETNE: return X86::COND_NE;
4688 case ISD::SETUO: return X86::COND_P;
4689 case ISD::SETO: return X86::COND_NP;
4690 case ISD::SETOEQ:
4691 case ISD::SETUNE: return X86::COND_INVALID;
4692 }
4693}
4694
4695/// Is there a floating point cmov for the specific X86 condition code?
4696/// Current x86 isa includes the following FP cmov instructions:
4697/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4698static bool hasFPCMov(unsigned X86CC) {
4699 switch (X86CC) {
4700 default:
4701 return false;
4702 case X86::COND_B:
4703 case X86::COND_BE:
4704 case X86::COND_E:
4705 case X86::COND_P:
4706 case X86::COND_A:
4707 case X86::COND_AE:
4708 case X86::COND_NE:
4709 case X86::COND_NP:
4710 return true;
4711 }
4712}
4713
4714
4715bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4716 const CallInst &I,
4717 MachineFunction &MF,
4718 unsigned Intrinsic) const {
4719
4720 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4721 if (!IntrData)
4722 return false;
4723
4724 Info.flags = MachineMemOperand::MONone;
4725 Info.offset = 0;
4726
4727 switch (IntrData->Type) {
4728 case TRUNCATE_TO_MEM_VI8:
4729 case TRUNCATE_TO_MEM_VI16:
4730 case TRUNCATE_TO_MEM_VI32: {
4731 Info.opc = ISD::INTRINSIC_VOID;
4732 Info.ptrVal = I.getArgOperand(0);
4733 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4734 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4735 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4736 ScalarVT = MVT::i8;
4737 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4738 ScalarVT = MVT::i16;
4739 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4740 ScalarVT = MVT::i32;
4741
4742 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4743 Info.align = 1;
4744 Info.flags |= MachineMemOperand::MOStore;
4745 break;
4746 }
4747 case GATHER:
4748 case GATHER_AVX2: {
4749 Info.opc = ISD::INTRINSIC_W_CHAIN;
4750 Info.ptrVal = nullptr;
4751 MVT DataVT = MVT::getVT(I.getType());
4752 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4753 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4754 IndexVT.getVectorNumElements());
4755 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4756 Info.align = 1;
4757 Info.flags |= MachineMemOperand::MOLoad;
4758 break;
4759 }
4760 case SCATTER: {
4761 Info.opc = ISD::INTRINSIC_VOID;
4762 Info.ptrVal = nullptr;
4763 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4764 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4765 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4766 IndexVT.getVectorNumElements());
4767 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4768 Info.align = 1;
4769 Info.flags |= MachineMemOperand::MOStore;
4770 break;
4771 }
4772 default:
4773 return false;
4774 }
4775
4776 return true;
4777}
4778
4779/// Returns true if the target can instruction select the
4780/// specified FP immediate natively. If false, the legalizer will
4781/// materialize the FP immediate as a load from a constant pool.
4782bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4783 bool ForCodeSize) const {
4784 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4785 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4786 return true;
4787 }
4788 return false;
4789}
4790
4791bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4792 ISD::LoadExtType ExtTy,
4793 EVT NewVT) const {
4794 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4795 // relocation target a movq or addq instruction: don't let the load shrink.
4796 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4797 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4798 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4799 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4800 return true;
4801}
4802
4803/// Returns true if it is beneficial to convert a load of a constant
4804/// to just the constant itself.
4805bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4806 Type *Ty) const {
4807 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4807, __PRETTY_FUNCTION__))
;
4808
4809 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4810 if (BitSize == 0 || BitSize > 64)
4811 return false;
4812 return true;
4813}
4814
4815bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
4816 // If we are using XMM registers in the ABI and the condition of the select is
4817 // a floating-point compare and we have blendv or conditional move, then it is
4818 // cheaper to select instead of doing a cross-register move and creating a
4819 // load that depends on the compare result.
4820 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
4821}
4822
4823bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4824 // TODO: It might be a win to ease or lift this restriction, but the generic
4825 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4826 if (VT.isVector() && Subtarget.hasAVX512())
4827 return false;
4828
4829 return true;
4830}
4831
4832bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4833 // TODO: We handle scalars using custom code, but generic combining could make
4834 // that unnecessary.
4835 APInt MulC;
4836 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4837 return false;
4838
4839 // If vector multiply is legal, assume that's faster than shl + add/sub.
4840 // TODO: Multiply is a complex op with higher latency and lower througput in
4841 // most implementations, so this check could be loosened based on type
4842 // and/or a CPU attribute.
4843 if (isOperationLegal(ISD::MUL, VT))
4844 return false;
4845
4846 // shl+add, shl+sub, shl+add+neg
4847 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4848 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4849}
4850
4851bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4852 bool IsSigned) const {
4853 // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4854 return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4855}
4856
4857bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4858 unsigned Index) const {
4859 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4860 return false;
4861
4862 // Mask vectors support all subregister combinations and operations that
4863 // extract half of vector.
4864 if (ResVT.getVectorElementType() == MVT::i1)
4865 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4866 (Index == ResVT.getVectorNumElements()));
4867
4868 return (Index % ResVT.getVectorNumElements()) == 0;
4869}
4870
4871bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
4872 // If the vector op is not supported, try to convert to scalar.
4873 EVT VecVT = VecOp.getValueType();
4874 if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
4875 return true;
4876
4877 // If the vector op is supported, but the scalar op is not, the transform may
4878 // not be worthwhile.
4879 EVT ScalarVT = VecVT.getScalarType();
4880 return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
4881}
4882
4883bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
4884 // TODO: Allow vectors?
4885 if (VT.isVector())
4886 return false;
4887 return VT.isSimple() || !isOperationExpand(Opcode, VT);
4888}
4889
4890bool X86TargetLowering::isCheapToSpeculateCttz() const {
4891 // Speculate cttz only if we can directly use TZCNT.
4892 return Subtarget.hasBMI();
4893}
4894
4895bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4896 // Speculate ctlz only if we can directly use LZCNT.
4897 return Subtarget.hasLZCNT();
4898}
4899
4900bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4901 EVT BitcastVT) const {
4902 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
4903 BitcastVT.getVectorElementType() == MVT::i1)
4904 return false;
4905
4906 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
4907 return false;
4908
4909 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4910}
4911
4912bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4913 const SelectionDAG &DAG) const {
4914 // Do not merge to float value size (128 bytes) if no implicit
4915 // float attribute is set.
4916 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4917 Attribute::NoImplicitFloat);
4918
4919 if (NoFloat) {
4920 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4921 return (MemVT.getSizeInBits() <= MaxIntSize);
4922 }
4923 // Make sure we don't merge greater than our preferred vector
4924 // width.
4925 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
4926 return false;
4927 return true;
4928}
4929
4930bool X86TargetLowering::isCtlzFast() const {
4931 return Subtarget.hasFastLZCNT();
4932}
4933
4934bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4935 const Instruction &AndI) const {
4936 return true;
4937}
4938
4939bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4940 EVT VT = Y.getValueType();
4941
4942 if (VT.isVector())
4943 return false;
4944
4945 if (!Subtarget.hasBMI())
4946 return false;
4947
4948 // There are only 32-bit and 64-bit forms for 'andn'.
4949 if (VT != MVT::i32 && VT != MVT::i64)
4950 return false;
4951
4952 return !isa<ConstantSDNode>(Y);
4953}
4954
4955bool X86TargetLowering::hasAndNot(SDValue Y) const {
4956 EVT VT = Y.getValueType();
4957
4958 if (!VT.isVector())
4959 return hasAndNotCompare(Y);
4960
4961 // Vector.
4962
4963 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
4964 return false;
4965
4966 if (VT == MVT::v4i32)
4967 return true;
4968
4969 return Subtarget.hasSSE2();
4970}
4971
4972bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
4973 const SDNode *N, CombineLevel Level) const {
4974 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4978, __PRETTY_FUNCTION__))
4975 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4978, __PRETTY_FUNCTION__))
4976 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4978, __PRETTY_FUNCTION__))
4977 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4978, __PRETTY_FUNCTION__))
4978 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 4978, __PRETTY_FUNCTION__))
;
4979 EVT VT = N->getValueType(0);
4980 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
4981 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
4982 // Only fold if the shift values are equal - so it folds to AND.
4983 // TODO - we should fold if either is a non-uniform vector but we don't do
4984 // the fold for non-splats yet.
4985 return N->getOperand(1) == N->getOperand(0).getOperand(1);
4986 }
4987 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
4988}
4989
4990bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
4991 EVT VT = Y.getValueType();
4992
4993 // For vectors, we don't have a preference, but we probably want a mask.
4994 if (VT.isVector())
4995 return false;
4996
4997 // 64-bit shifts on 32-bit targets produce really bad bloated code.
4998 if (VT == MVT::i64 && !Subtarget.is64Bit())
4999 return false;
5000
5001 return true;
5002}
5003
5004bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5005 // Any legal vector type can be splatted more efficiently than
5006 // loading/spilling from memory.
5007 return isTypeLegal(VT);
5008}
5009
5010MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5011 MVT VT = MVT::getIntegerVT(NumBits);
5012 if (isTypeLegal(VT))
5013 return VT;
5014
5015 // PMOVMSKB can handle this.
5016 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5017 return MVT::v16i8;
5018
5019 // VPMOVMSKB can handle this.
5020 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5021 return MVT::v32i8;
5022
5023 // TODO: Allow 64-bit type for 32-bit target.
5024 // TODO: 512-bit types should be allowed, but make sure that those
5025 // cases are handled in combineVectorSizedSetCCEquality().
5026
5027 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5028}
5029
5030/// Val is the undef sentinel value or equal to the specified value.
5031static bool isUndefOrEqual(int Val, int CmpVal) {
5032 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5033}
5034
5035/// Val is either the undef or zero sentinel value.
5036static bool isUndefOrZero(int Val) {
5037 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5038}
5039
5040/// Return true if every element in Mask, beginning from position Pos and ending
5041/// in Pos+Size is the undef sentinel value.
5042static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5043 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5044 if (Mask[i] != SM_SentinelUndef)
5045 return false;
5046 return true;
5047}
5048
5049/// Return true if the mask creates a vector whose lower half is undefined.
5050static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5051 unsigned NumElts = Mask.size();
5052 return isUndefInRange(Mask, 0, NumElts / 2);
5053}
5054
5055/// Return true if the mask creates a vector whose upper half is undefined.
5056static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5057 unsigned NumElts = Mask.size();
5058 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5059}
5060
5061/// Return true if Val falls within the specified range (L, H].
5062static bool isInRange(int Val, int Low, int Hi) {
5063 return (Val >= Low && Val < Hi);
5064}
5065
5066/// Return true if the value of any element in Mask falls within the specified
5067/// range (L, H].
5068static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5069 for (int M : Mask)
5070 if (isInRange(M, Low, Hi))
5071 return true;
5072 return false;
5073}
5074
5075/// Return true if Val is undef or if its value falls within the
5076/// specified range (L, H].
5077static bool isUndefOrInRange(int Val, int Low, int Hi) {
5078 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5079}
5080
5081/// Return true if every element in Mask is undef or if its value
5082/// falls within the specified range (L, H].
5083static bool isUndefOrInRange(ArrayRef<int> Mask,
5084 int Low, int Hi) {
5085 for (int M : Mask)
5086 if (!isUndefOrInRange(M, Low, Hi))
5087 return false;
5088 return true;
5089}
5090
5091/// Return true if Val is undef, zero or if its value falls within the
5092/// specified range (L, H].
5093static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5094 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5095}
5096
5097/// Return true if every element in Mask is undef, zero or if its value
5098/// falls within the specified range (L, H].
5099static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5100 for (int M : Mask)
5101 if (!isUndefOrZeroOrInRange(M, Low, Hi))
5102 return false;
5103 return true;
5104}
5105
5106/// Return true if every element in Mask, beginning
5107/// from position Pos and ending in Pos + Size, falls within the specified
5108/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5109static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5110 unsigned Size, int Low, int Step = 1) {
5111 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5112 if (!isUndefOrEqual(Mask[i], Low))
5113 return false;
5114 return true;
5115}
5116
5117/// Return true if every element in Mask, beginning
5118/// from position Pos and ending in Pos+Size, falls within the specified
5119/// sequential range (Low, Low+Size], or is undef or is zero.
5120static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5121 unsigned Size, int Low) {
5122 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
5123 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5124 return false;
5125 return true;
5126}
5127
5128/// Return true if every element in Mask, beginning
5129/// from position Pos and ending in Pos+Size is undef or is zero.
5130static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5131 unsigned Size) {
5132 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5133 if (!isUndefOrZero(Mask[i]))
5134 return false;
5135 return true;
5136}
5137
5138/// Helper function to test whether a shuffle mask could be
5139/// simplified by widening the elements being shuffled.
5140///
5141/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5142/// leaves it in an unspecified state.
5143///
5144/// NOTE: This must handle normal vector shuffle masks and *target* vector
5145/// shuffle masks. The latter have the special property of a '-2' representing
5146/// a zero-ed lane of a vector.
5147static bool canWidenShuffleElements(ArrayRef<int> Mask,
5148 SmallVectorImpl<int> &WidenedMask) {
5149 WidenedMask.assign(Mask.size() / 2, 0);
5150 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5151 int M0 = Mask[i];
5152 int M1 = Mask[i + 1];
5153
5154 // If both elements are undef, its trivial.
5155 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5156 WidenedMask[i / 2] = SM_SentinelUndef;
5157 continue;
5158 }
5159
5160 // Check for an undef mask and a mask value properly aligned to fit with
5161 // a pair of values. If we find such a case, use the non-undef mask's value.
5162 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5163 WidenedMask[i / 2] = M1 / 2;
5164 continue;
5165 }
5166 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5167 WidenedMask[i / 2] = M0 / 2;
5168 continue;
5169 }
5170
5171 // When zeroing, we need to spread the zeroing across both lanes to widen.
5172 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5173 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5174 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5175 WidenedMask[i / 2] = SM_SentinelZero;
5176 continue;
5177 }
5178 return false;
5179 }
5180
5181 // Finally check if the two mask values are adjacent and aligned with
5182 // a pair.
5183 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5184 WidenedMask[i / 2] = M0 / 2;
5185 continue;
5186 }
5187
5188 // Otherwise we can't safely widen the elements used in this shuffle.
5189 return false;
5190 }
5191 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5192, __PRETTY_FUNCTION__))
5192 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5192, __PRETTY_FUNCTION__))
;
5193
5194 return true;
5195}
5196
5197static bool canWidenShuffleElements(ArrayRef<int> Mask,
5198 const APInt &Zeroable,
5199 SmallVectorImpl<int> &WidenedMask) {
5200 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5201 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5202 if (TargetMask[i] == SM_SentinelUndef)
5203 continue;
5204 if (Zeroable[i])
5205 TargetMask[i] = SM_SentinelZero;
5206 }
5207 return canWidenShuffleElements(TargetMask, WidenedMask);
5208}
5209
5210static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5211 SmallVector<int, 32> WidenedMask;
5212 return canWidenShuffleElements(Mask, WidenedMask);
5213}
5214
5215/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5216bool X86::isZeroNode(SDValue Elt) {
5217 return isNullConstant(Elt) || isNullFPConstant(Elt);
5218}
5219
5220// Build a vector of constants.
5221// Use an UNDEF node if MaskElt == -1.
5222// Split 64-bit constants in the 32-bit mode.
5223static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5224 const SDLoc &dl, bool IsMask = false) {
5225
5226 SmallVector<SDValue, 32> Ops;
5227 bool Split = false;
5228
5229 MVT ConstVecVT = VT;
5230 unsigned NumElts = VT.getVectorNumElements();
5231 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5232 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5233 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5234 Split = true;
5235 }
5236
5237 MVT EltVT = ConstVecVT.getVectorElementType();
5238 for (unsigned i = 0; i < NumElts; ++i) {
5239 bool IsUndef = Values[i] < 0 && IsMask;
5240 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5241 DAG.getConstant(Values[i], dl, EltVT);
5242 Ops.push_back(OpNode);
5243 if (Split)
5244 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5245 DAG.getConstant(0, dl, EltVT));
5246 }
5247 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5248 if (Split)
5249 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5250 return ConstsNode;
5251}
5252
5253static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5254 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5255 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5256, __PRETTY_FUNCTION__))
5256 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5256, __PRETTY_FUNCTION__))
;
5257 SmallVector<SDValue, 32> Ops;
5258 bool Split = false;
5259
5260 MVT ConstVecVT = VT;
5261 unsigned NumElts = VT.getVectorNumElements();
5262 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5263 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5264 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5265 Split = true;
5266 }
5267
5268 MVT EltVT = ConstVecVT.getVectorElementType();
5269 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5270 if (Undefs[i]) {
5271 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5272 continue;
5273 }
5274 const APInt &V = Bits[i];
5275 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5275, __PRETTY_FUNCTION__))
;
5276 if (Split) {
5277 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5278 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5279 } else if (EltVT == MVT::f32) {
5280 APFloat FV(APFloat::IEEEsingle(), V);
5281 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5282 } else if (EltVT == MVT::f64) {
5283 APFloat FV(APFloat::IEEEdouble(), V);
5284 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5285 } else {
5286 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5287 }
5288 }
5289
5290 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5291 return DAG.getBitcast(VT, ConstsNode);
5292}
5293
5294/// Returns a vector of specified type with all zero elements.
5295static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5296 SelectionDAG &DAG, const SDLoc &dl) {
5297 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5299, __PRETTY_FUNCTION__))
5298 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5299, __PRETTY_FUNCTION__))
5299 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5299, __PRETTY_FUNCTION__))
;
5300
5301 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5302 // type. This ensures they get CSE'd. But if the integer type is not
5303 // available, use a floating-point +0.0 instead.
5304 SDValue Vec;
5305 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5306 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5307 } else if (VT.getVectorElementType() == MVT::i1) {
5308 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5309, __PRETTY_FUNCTION__))
5309 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5309, __PRETTY_FUNCTION__))
;
5310 Vec = DAG.getConstant(0, dl, VT);
5311 } else {
5312 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5313 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5314 }
5315 return DAG.getBitcast(VT, Vec);
5316}
5317
5318static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5319 const SDLoc &dl, unsigned vectorWidth) {
5320 EVT VT = Vec.getValueType();
5321 EVT ElVT = VT.getVectorElementType();
5322 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5323 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5324 VT.getVectorNumElements()/Factor);
5325
5326 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5327 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5328 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
;
5329
5330 // This is the index of the first element of the vectorWidth-bit chunk
5331 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5332 IdxVal &= ~(ElemsPerChunk - 1);
5333
5334 // If the input is a buildvector just emit a smaller one.
5335 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5336 return DAG.getBuildVector(ResultVT, dl,
5337 Vec->ops().slice(IdxVal, ElemsPerChunk));
5338
5339 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5340 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5341}
5342
5343/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5344/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5345/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5346/// instructions or a simple subregister reference. Idx is an index in the
5347/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5348/// lowering EXTRACT_VECTOR_ELT operations easier.
5349static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5350 SelectionDAG &DAG, const SDLoc &dl) {
5351 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
5352 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
;
5353 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5354}
5355
5356/// Generate a DAG to grab 256-bits from a 512-bit vector.
5357static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5358 SelectionDAG &DAG, const SDLoc &dl) {
5359 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5359, __PRETTY_FUNCTION__))
;
5360 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5361}
5362
5363static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5364 SelectionDAG &DAG, const SDLoc &dl,
5365 unsigned vectorWidth) {
5366 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5367, __PRETTY_FUNCTION__))
5367 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5367, __PRETTY_FUNCTION__))
;
5368 // Inserting UNDEF is Result
5369 if (Vec.isUndef())
5370 return Result;
5371 EVT VT = Vec.getValueType();
5372 EVT ElVT = VT.getVectorElementType();
5373 EVT ResultVT = Result.getValueType();
5374
5375 // Insert the relevant vectorWidth bits.
5376 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5377 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5377, __PRETTY_FUNCTION__))
;
5378
5379 // This is the index of the first element of the vectorWidth-bit chunk
5380 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5381 IdxVal &= ~(ElemsPerChunk - 1);
5382
5383 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5384 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5385}
5386
5387/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5388/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5389/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5390/// simple superregister reference. Idx is an index in the 128 bits
5391/// we want. It need not be aligned to a 128-bit boundary. That makes
5392/// lowering INSERT_VECTOR_ELT operations easier.
5393static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5394 SelectionDAG &DAG, const SDLoc &dl) {
5395 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5395, __PRETTY_FUNCTION__))
;
5396 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5397}
5398
5399/// Widen a vector to a larger size with the same scalar type, with the new
5400/// elements either zero or undef.
5401static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5402 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5403 const SDLoc &dl) {
5404 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5406, __PRETTY_FUNCTION__))
5405 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5406, __PRETTY_FUNCTION__))
5406 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5406, __PRETTY_FUNCTION__))
;
5407 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5408 : DAG.getUNDEF(VT);
5409 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5410 DAG.getIntPtrConstant(0, dl));
5411}
5412
5413// Helper function to collect subvector ops that are concated together,
5414// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5415// The subvectors in Ops are guaranteed to be the same type.
5416static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5417 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5417, __PRETTY_FUNCTION__))
;
5418
5419 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5420 Ops.append(N->op_begin(), N->op_end());
5421 return true;
5422 }
5423
5424 if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
5425 isa<ConstantSDNode>(N->getOperand(2))) {
5426 SDValue Src = N->getOperand(0);
5427 SDValue Sub = N->getOperand(1);
5428 const APInt &Idx = N->getConstantOperandAPInt(2);
5429 EVT VT = Src.getValueType();
5430 EVT SubVT = Sub.getValueType();
5431
5432 // TODO - Handle more general insert_subvector chains.
5433 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5434 Idx == (VT.getVectorNumElements() / 2) &&
5435 Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5436 isNullConstant(Src.getOperand(2))) {
5437 Ops.push_back(Src.getOperand(1));
5438 Ops.push_back(Sub);
5439 return true;
5440 }
5441 }
5442
5443 return false;
5444}
5445
5446// Helper for splitting operands of an operation to legal target size and
5447// apply a function on each part.
5448// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5449// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5450// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5451// The argument Builder is a function that will be applied on each split part:
5452// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5453template <typename F>
5454SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5455 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5456 F Builder, bool CheckBWI = true) {
5457 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5457, __PRETTY_FUNCTION__))
;
5458 unsigned NumSubs = 1;
5459 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5460 (!CheckBWI && Subtarget.useAVX512Regs())) {
5461 if (VT.getSizeInBits() > 512) {
5462 NumSubs = VT.getSizeInBits() / 512;
5463 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5463, __PRETTY_FUNCTION__))
;
5464 }
5465 } else if (Subtarget.hasAVX2()) {
5466 if (VT.getSizeInBits() > 256) {
5467 NumSubs = VT.getSizeInBits() / 256;
5468 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5468, __PRETTY_FUNCTION__))
;
5469 }
5470 } else {
5471 if (VT.getSizeInBits() > 128) {
5472 NumSubs = VT.getSizeInBits() / 128;
5473 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5473, __PRETTY_FUNCTION__))
;
5474 }
5475 }
5476
5477 if (NumSubs == 1)
5478 return Builder(DAG, DL, Ops);
5479
5480 SmallVector<SDValue, 4> Subs;
5481 for (unsigned i = 0; i != NumSubs; ++i) {
5482 SmallVector<SDValue, 2> SubOps;
5483 for (SDValue Op : Ops) {
5484 EVT OpVT = Op.getValueType();
5485 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5486 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5487 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5488 }
5489 Subs.push_back(Builder(DAG, DL, SubOps));
5490 }
5491 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5492}
5493
5494// Return true if the instruction zeroes the unused upper part of the
5495// destination and accepts mask.
5496static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5497 switch (Opcode) {
5498 default:
5499 return false;
5500 case X86ISD::CMPM:
5501 case X86ISD::CMPM_SAE:
5502 case ISD::SETCC:
5503 return true;
5504 }
5505}
5506
5507/// Insert i1-subvector to i1-vector.
5508static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5509 const X86Subtarget &Subtarget) {
5510
5511 SDLoc dl(Op);
5512 SDValue Vec = Op.getOperand(0);
5513 SDValue SubVec = Op.getOperand(1);
5514 SDValue Idx = Op.getOperand(2);
5515
5516 if (!isa<ConstantSDNode>(Idx))
5517 return SDValue();
5518
5519 // Inserting undef is a nop. We can just return the original vector.
5520 if (SubVec.isUndef())
5521 return Vec;
5522
5523 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5524 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5525 return Op;
5526
5527 MVT OpVT = Op.getSimpleValueType();
5528 unsigned NumElems = OpVT.getVectorNumElements();
5529
5530 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5531
5532 // Extend to natively supported kshift.
5533 MVT WideOpVT = OpVT;
5534 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5535 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5536
5537 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5538 // if necessary.
5539 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5540 // May need to promote to a legal type.
5541 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5542 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5543 SubVec, Idx);
5544 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5545 }
5546
5547 MVT SubVecVT = SubVec.getSimpleValueType();
5548 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5549
5550 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5552, __PRETTY_FUNCTION__))
5551 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5552, __PRETTY_FUNCTION__))
5552 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5552, __PRETTY_FUNCTION__))
;
5553
5554 SDValue Undef = DAG.getUNDEF(WideOpVT);
5555
5556 if (IdxVal == 0) {
5557 // Zero lower bits of the Vec
5558 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5559 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5560 ZeroIdx);
5561 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5562 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5563 // Merge them together, SubVec should be zero extended.
5564 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5565 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5566 SubVec, ZeroIdx);
5567 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5568 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5569 }
5570
5571 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5572 Undef, SubVec, ZeroIdx);
5573
5574 if (Vec.isUndef()) {
5575 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5575, __PRETTY_FUNCTION__))
;
5576 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5577 DAG.getConstant(IdxVal, dl, MVT::i8));
5578 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5579 }
5580
5581 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5582 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5582, __PRETTY_FUNCTION__))
;
5583 NumElems = WideOpVT.getVectorNumElements();
5584 unsigned ShiftLeft = NumElems - SubVecNumElems;
5585 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5586 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5587 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5588 if (ShiftRight != 0)
5589 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5590 DAG.getConstant(ShiftRight, dl, MVT::i8));
5591 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5592 }
5593
5594 // Simple case when we put subvector in the upper part
5595 if (IdxVal + SubVecNumElems == NumElems) {
5596 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5597 DAG.getConstant(IdxVal, dl, MVT::i8));
5598 if (SubVecNumElems * 2 == NumElems) {
5599 // Special case, use legal zero extending insert_subvector. This allows
5600 // isel to opimitize when bits are known zero.
5601 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5602 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5603 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5604 Vec, ZeroIdx);
5605 } else {
5606 // Otherwise use explicit shifts to zero the bits.
5607 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5608 Undef, Vec, ZeroIdx);
5609 NumElems = WideOpVT.getVectorNumElements();
5610 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5611 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5612 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5613 }
5614 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5615 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5616 }
5617
5618 // Inserting into the middle is more complicated.
5619
5620 NumElems = WideOpVT.getVectorNumElements();
5621
5622 // Widen the vector if needed.
5623 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5624 // Move the current value of the bit to be replace to the lsbs.
5625 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5626 DAG.getConstant(IdxVal, dl, MVT::i8));
5627 // Xor with the new bit.
5628 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5629 // Shift to MSB, filling bottom bits with 0.
5630 unsigned ShiftLeft = NumElems - SubVecNumElems;
5631 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5632 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5633 // Shift to the final position, filling upper bits with 0.
5634 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5635 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5636 DAG.getConstant(ShiftRight, dl, MVT::i8));
5637 // Xor with original vector leaving the new value.
5638 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5639 // Reduce to original width if needed.
5640 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5641}
5642
5643static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5644 unsigned NumElems, SelectionDAG &DAG,
5645 const SDLoc &dl, unsigned VectorWidth) {
5646 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5647 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5648}
5649
5650/// Returns a vector of specified type with all bits set.
5651/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5652/// Then bitcast to their original type, ensuring they get CSE'd.
5653static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5654 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5655, __PRETTY_FUNCTION__))
5655 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5655, __PRETTY_FUNCTION__))
;
5656
5657 APInt Ones = APInt::getAllOnesValue(32);
5658 unsigned NumElts = VT.getSizeInBits() / 32;
5659 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5660 return DAG.getBitcast(VT, Vec);
5661}
5662
5663static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
5664 SelectionDAG &DAG) {
5665 EVT InVT = In.getValueType();
5666 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5666, __PRETTY_FUNCTION__))
;
5667
5668 // For 256-bit vectors, we only need the lower (128-bit) input half.
5669 // For 512-bit vectors, we only need the lower input half or quarter.
5670 if (InVT.getSizeInBits() > 128) {
5671 assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5672, __PRETTY_FUNCTION__))
5672 "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5672, __PRETTY_FUNCTION__))
;
5673 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5674 In = extractSubVector(In, 0, DAG, DL,
5675 std::max(128U, VT.getSizeInBits() / Scale));
5676 InVT = In.getValueType();
5677 }
5678
5679 if (VT.getVectorNumElements() == InVT.getVectorNumElements())
5680 return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5681 DL, VT, In);
5682
5683 return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
5684 : ISD::ZERO_EXTEND_VECTOR_INREG,
5685 DL, VT, In);
5686}
5687
5688/// Returns a vector_shuffle node for an unpackl operation.
5689static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5690 SDValue V1, SDValue V2) {
5691 SmallVector<int, 8> Mask;
5692 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5693 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5694}
5695
5696/// Returns a vector_shuffle node for an unpackh operation.
5697static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5698 SDValue V1, SDValue V2) {
5699 SmallVector<int, 8> Mask;
5700 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5701 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5702}
5703
5704/// Return a vector_shuffle of the specified vector of zero or undef vector.
5705/// This produces a shuffle where the low element of V2 is swizzled into the
5706/// zero/undef vector, landing at element Idx.
5707/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5708static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5709 bool IsZero,
5710 const X86Subtarget &Subtarget,
5711 SelectionDAG &DAG) {
5712 MVT VT = V2.getSimpleValueType();
5713 SDValue V1 = IsZero
5714 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5715 int NumElems = VT.getVectorNumElements();
5716 SmallVector<int, 16> MaskVec(NumElems);
5717 for (int i = 0; i != NumElems; ++i)
5718 // If this is the insertion idx, put the low elt of V2 here.
5719 MaskVec[i] = (i == Idx) ? NumElems : i;
5720 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5721}
5722
5723static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
5724 if (!Load)
5725 return nullptr;
5726
5727 SDValue Ptr = Load->getBasePtr();
5728 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5729 Ptr->getOpcode() == X86ISD::WrapperRIP)
5730 Ptr = Ptr->getOperand(0);
5731
5732 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5733 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
5734 return nullptr;
5735
5736 return CNode->getConstVal();
5737}
5738
5739static const Constant *getTargetConstantFromNode(SDValue Op) {
5740 Op = peekThroughBitcasts(Op);
5741 return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
5742}
5743
5744const Constant *
5745X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
5746 assert(LD && "Unexpected null LoadSDNode")((LD && "Unexpected null LoadSDNode") ? static_cast<
void> (0) : __assert_fail ("LD && \"Unexpected null LoadSDNode\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5746, __PRETTY_FUNCTION__))
;
5747 return getTargetConstantFromNode(LD);
5748}
5749
5750// Extract raw constant bits from constant pools.
5751static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5752 APInt &UndefElts,
5753 SmallVectorImpl<APInt> &EltBits,
5754 bool AllowWholeUndefs = true,
5755 bool AllowPartialUndefs = true) {
5756 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5756, __PRETTY_FUNCTION__))
;
5757
5758 Op = peekThroughBitcasts(Op);
5759
5760 EVT VT = Op.getValueType();
5761 unsigned SizeInBits = VT.getSizeInBits();
5762 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5762, __PRETTY_FUNCTION__))
;
5763 unsigned NumElts = SizeInBits / EltSizeInBits;
5764
5765 // Bitcast a source array of element bits to the target size.
5766 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5767 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5768 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5769 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5770, __PRETTY_FUNCTION__))
5770 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86ISelLowering.cpp"
, 5770, __PRETTY_FUNCTION__))
;
5771
5772 // Don't split if we don't allow undef bits.
5773 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5774 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5775 return false;
5776
5777 // If we're already the right size, don't bother bitcasting.
5778 if (NumSrcElts == NumElts) {
5779 UndefElts = UndefSrcElts;
5780 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5781 return true;
5782 }
5783
5784 // Extract all the undef/constant element data and pack into single bitsets.
5785 APInt UndefBits(SizeInBits, 0);
5786 APInt MaskBits(SizeInBits, 0);
5787
5788 for (unsigned i = 0; i != NumSrcElts; ++i) {
5789 unsigned BitOffset = i * SrcEltSizeInBits;
5790 if (UndefSrcElts[i])
5791 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5792 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5793 }
5794
5795 // Split the undef/constant single bitset data into the target elements.
5796 UndefElts = APInt(NumElts, 0);
5797 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5798
5799 for (unsigned i = 0; i != NumElts; ++i) {
5800 unsigned BitOffset = i * EltSizeInBits;
5801 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5802
5803 // Only treat an element as UNDEF if all bits are UNDEF.
5804 if (UndefEltBits.isAllOnesValue()) {
5805 if (!AllowWholeUndefs)
5806 return false;
5807 UndefElts.setBit(i);
5808 continue;
5809 }
5810
5811 // If only some bits are UNDEF then treat them as zero (or bail if not
5812 // supported).
5813 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5814 return false;
5815
5816 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5817 EltBits[i] = Bits.getZExtValue();
5818 }
5819 return true;
5820 };
5821
5822 // Collect constant bits and insert into mask/undef bit masks.
5823 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5824 unsigned UndefBitIndex) {
5825 if (!Cst)
5826 return false;
5827 if (isa<UndefValue>(Cst)) {
5828 Undefs.setBit(UndefBitIndex);
5829 return true;
5830 }
5831 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5832 Mask = CInt->getValue();
5833 return true;
5834 }
5835 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5836 Mask = CFP->getValueAPF().bitcastToAPInt();
5837 return true;
5838 }
5839 return false;
5840 };
5841
5842 // Handle UNDEFs.
5843 if (Op.isUndef()) {
5844 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5845 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5846 return CastBitData(UndefSrcElts, SrcEltBits);
5847 }
5848
5849 // Extract scalar constant bits.
5850 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5851 APInt UndefSrcElts = APInt::getNullValue(1);
5852 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5853 return CastBitData(UndefSrcElts, SrcEltBits);
5854 }
5855 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5856 APInt UndefSrcElts = APInt::getNullValue(1);
5857 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5858 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5859 return CastBitData(UndefSrcElts, SrcEltBits);
5860 }
5861
5862 // Extract constant bits from build vector.
5863 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5864 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5865 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5866
5867 APInt UndefSrcElts(NumSrcElts, 0);
5868 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5869 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5870 const SDValue &Src = Op.getOperand(i);
5871 if (Src.isUndef()) {
5872 UndefSrcElts.setBit(i);
5873 continue;
5874 }
5875 auto *Cst = cast<ConstantSDNode>(Src);
5876 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5877 }
5878 return CastBitData(UndefSrcElts, SrcEltBits);
5879 }
5880 if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
5881 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5882 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5883
5884 APInt UndefSrcElts(NumSrcElts, 0);
5885 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5886 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5887 const SDValue &Src = Op.getOperand(i);
5888 if (Src.isUndef()) {
5889 UndefSrcElts.setBit(i);
5890 continue;
5891 }
5892 auto *Cst = cast<ConstantFPSDNode>(Src);
5893 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5894 SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
5895 }
5896 return CastBitData(UndefSrcElts, SrcEltBits);
5897 }
5898
5899 // Extract constant bits from constant pool vector.
5900 if (auto *Cst = getTargetConstantFromNode(Op)) {
5901 Type *CstTy = Cst->getType();
5902 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
5903 if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5904 return false