Bug Summary

File:include/llvm/ADT/SmallBitVector.h
Warning:line 120, column 3
Potential memory leak

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn358860/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn358860/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn358860/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn358860/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn358860=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-04-22-050718-5320-1 -x c++ /build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/EHPersonalities.h"
29#include "llvm/CodeGen/IntrinsicLowering.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineJumpTableInfo.h"
34#include "llvm/CodeGen/MachineModuleInfo.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/WinEHFuncInfo.h"
38#include "llvm/IR/CallSite.h"
39#include "llvm/IR/CallingConv.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/DiagnosticInfo.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/GlobalAlias.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCContext.h"
50#include "llvm/MC/MCExpr.h"
51#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownBits.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Target/TargetOptions.h"
58#include <algorithm>
59#include <bitset>
60#include <cctype>
61#include <numeric>
62using namespace llvm;
63
64#define DEBUG_TYPE"x86-isel" "x86-isel"
65
66STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
67
68static cl::opt<bool> ExperimentalVectorWideningLegalization(
69 "x86-experimental-vector-widening-legalization", cl::init(false),
70 cl::desc("Enable an experimental vector type legalization through widening "
71 "rather than promotion."),
72 cl::Hidden);
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc("Sets the preferable loop alignment for experiments "
77 "(the last x86-experimental-pref-loop-alignment bits"
78 " of the loop header PC will be 0)."),
79 cl::Hidden);
80
81static cl::opt<bool> MulConstantOptimization(
82 "mul-constant-optimization", cl::init(true),
83 cl::desc("Replace 'mul x, Const' with more effective instructions like "
84 "SHIFT, LEA, etc."),
85 cl::Hidden);
86
87/// Call this when the user attempts to do something unsupported, like
88/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
89/// report_fatal_error, so calling code should attempt to recover without
90/// crashing.
91static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
92 const char *Msg) {
93 MachineFunction &MF = DAG.getMachineFunction();
94 DAG.getContext()->diagnose(
95 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
96}
97
98X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
99 const X86Subtarget &STI)
100 : TargetLowering(TM), Subtarget(STI) {
101 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
102 X86ScalarSSEf64 = Subtarget.hasSSE2();
103 X86ScalarSSEf32 = Subtarget.hasSSE1();
104 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
105
106 // Set up the TargetLowering object.
107
108 // X86 is weird. It always uses i8 for shift amounts and setcc results.
109 setBooleanContents(ZeroOrOneBooleanContent);
110 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
111 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
112
113 // For 64-bit, since we have so many registers, use the ILP scheduler.
114 // For 32-bit, use the register pressure specific scheduling.
115 // For Atom, always use ILP scheduling.
116 if (Subtarget.isAtom())
117 setSchedulingPreference(Sched::ILP);
118 else if (Subtarget.is64Bit())
119 setSchedulingPreference(Sched::ILP);
120 else
121 setSchedulingPreference(Sched::RegPressure);
122 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
123 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
124
125 // Bypass expensive divides and use cheaper ones.
126 if (TM.getOptLevel() >= CodeGenOpt::Default) {
127 if (Subtarget.hasSlowDivide32())
128 addBypassSlowDiv(32, 8);
129 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
130 addBypassSlowDiv(64, 32);
131 }
132
133 if (Subtarget.isTargetKnownWindowsMSVC() ||
134 Subtarget.isTargetWindowsItanium()) {
135 // Setup Windows compiler runtime calls.
136 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
137 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
138 setLibcallName(RTLIB::SREM_I64, "_allrem");
139 setLibcallName(RTLIB::UREM_I64, "_aullrem");
140 setLibcallName(RTLIB::MUL_I64, "_allmul");
141 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
142 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
143 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
146 }
147
148 if (Subtarget.isTargetDarwin()) {
149 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
150 setUseUnderscoreSetJmp(false);
151 setUseUnderscoreLongJmp(false);
152 } else if (Subtarget.isTargetWindowsGNU()) {
153 // MS runtime is weird: it exports _setjmp, but longjmp!
154 setUseUnderscoreSetJmp(true);
155 setUseUnderscoreLongJmp(false);
156 } else {
157 setUseUnderscoreSetJmp(true);
158 setUseUnderscoreLongJmp(true);
159 }
160
161 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
162 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
163 // FIXME: Should we be limitting the atomic size on other configs? Default is
164 // 1024.
165 if (!Subtarget.hasCmpxchg8b())
166 setMaxAtomicSizeInBitsSupported(32);
167
168 // Set up the register classes.
169 addRegisterClass(MVT::i8, &X86::GR8RegClass);
170 addRegisterClass(MVT::i16, &X86::GR16RegClass);
171 addRegisterClass(MVT::i32, &X86::GR32RegClass);
172 if (Subtarget.is64Bit())
173 addRegisterClass(MVT::i64, &X86::GR64RegClass);
174
175 for (MVT VT : MVT::integer_valuetypes())
176 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
177
178 // We don't accept any truncstore of integer registers.
179 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
180 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
181 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
182 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
183 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
184 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
185
186 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
188 // SETOEQ and SETUNE require checking two conditions.
189 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
190 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
191 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
192 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
193 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
194 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
195
196 // Integer absolute.
197 if (Subtarget.hasCMov()) {
198 setOperationAction(ISD::ABS , MVT::i16 , Custom);
199 setOperationAction(ISD::ABS , MVT::i32 , Custom);
200 }
201 setOperationAction(ISD::ABS , MVT::i64 , Custom);
202
203 // Funnel shifts.
204 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
205 setOperationAction(ShiftOp , MVT::i16 , Custom);
206 setOperationAction(ShiftOp , MVT::i32 , Custom);
207 if (Subtarget.is64Bit())
208 setOperationAction(ShiftOp , MVT::i64 , Custom);
209 }
210
211 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
212 // operation.
213 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
214 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
215 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
216
217 if (Subtarget.is64Bit()) {
218 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
219 // f32/f64 are legal, f80 is custom.
220 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
221 else
222 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
223 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
224 } else if (!Subtarget.useSoftFloat()) {
225 // We have an algorithm for SSE2->double, and we turn this into a
226 // 64-bit FILD followed by conditional FADD for other targets.
227 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
228 // We have an algorithm for SSE2, and we turn this into a 64-bit
229 // FILD or VCVTUSI2SS/SD for other targets.
230 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
231 } else {
232 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
233 }
234
235 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
236 // this operation.
237 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
239
240 if (!Subtarget.useSoftFloat()) {
241 // SSE has no i16 to fp conversion, only i32.
242 if (X86ScalarSSEf32) {
243 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
244 // f32 and f64 cases are Legal, f80 case is not
245 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
246 } else {
247 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
248 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
249 }
250 } else {
251 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
252 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
253 }
254
255 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
256 // this operation.
257 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
259
260 if (!Subtarget.useSoftFloat()) {
261 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
262 // are Legal, f80 is custom lowered.
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
264 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
265
266 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
267 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
268 } else {
269 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
270 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
271 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
272 }
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
277 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
279
280 if (Subtarget.is64Bit()) {
281 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
282 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
283 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
284 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
285 } else {
286 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
287 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
288 }
289 } else if (!Subtarget.useSoftFloat()) {
290 // Since AVX is a superset of SSE3, only check for SSE here.
291 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
292 // Expand FP_TO_UINT into a select.
293 // FIXME: We would like to use a Custom expander here eventually to do
294 // the optimal thing for SSE vs. the default expansion in the legalizer.
295 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
296 else
297 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
298 // With SSE3 we can use fisttpll to convert to a signed i64; without
299 // SSE, we're stuck with a fistpll.
300 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
301
302 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
303 }
304
305 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
306 if (!X86ScalarSSEf64) {
307 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
308 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
309 if (Subtarget.is64Bit()) {
310 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
311 // Without SSE, i64->f64 goes through memory.
312 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
313 }
314 } else if (!Subtarget.is64Bit())
315 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
316
317 // Scalar integer divide and remainder are lowered to use operations that
318 // produce two results, to match the available instructions. This exposes
319 // the two-result form to trivial CSE, which is able to combine x/y and x%y
320 // into a single instruction.
321 //
322 // Scalar integer multiply-high is also lowered to use two-result
323 // operations, to match the available instructions. However, plain multiply
324 // (low) operations are left as Legal, as there are single-result
325 // instructions for this in x86. Using the two-result multiply instructions
326 // when both high and low results are needed must be arranged by dagcombine.
327 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328 setOperationAction(ISD::MULHS, VT, Expand);
329 setOperationAction(ISD::MULHU, VT, Expand);
330 setOperationAction(ISD::SDIV, VT, Expand);
331 setOperationAction(ISD::UDIV, VT, Expand);
332 setOperationAction(ISD::SREM, VT, Expand);
333 setOperationAction(ISD::UREM, VT, Expand);
334 }
335
336 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
337 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
338 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
339 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
340 setOperationAction(ISD::BR_CC, VT, Expand);
341 setOperationAction(ISD::SELECT_CC, VT, Expand);
342 }
343 if (Subtarget.is64Bit())
344 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
345 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
346 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
347 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
348 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
349
350 setOperationAction(ISD::FREM , MVT::f32 , Expand);
351 setOperationAction(ISD::FREM , MVT::f64 , Expand);
352 setOperationAction(ISD::FREM , MVT::f80 , Expand);
353 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
354
355 // Promote the i8 variants and force them on up to i32 which has a shorter
356 // encoding.
357 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
358 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
359 if (!Subtarget.hasBMI()) {
360 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
361 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
362 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
363 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
364 if (Subtarget.is64Bit()) {
365 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
366 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
367 }
368 }
369
370 if (Subtarget.hasLZCNT()) {
371 // When promoting the i8 variants, force them to i32 for a shorter
372 // encoding.
373 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
374 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
375 } else {
376 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
377 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
378 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
379 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
380 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
381 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
382 if (Subtarget.is64Bit()) {
383 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
384 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
385 }
386 }
387
388 // Special handling for half-precision floating point conversions.
389 // If we don't have F16C support, then lower half float conversions
390 // into library calls.
391 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
392 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
393 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
394 }
395
396 // There's never any support for operations beyond MVT::f32.
397 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
398 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
399 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
400 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
401
402 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
403 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
404 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
405 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
406 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
407 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
408
409 if (Subtarget.hasPOPCNT()) {
410 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
411 } else {
412 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
413 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
414 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
415 if (Subtarget.is64Bit())
416 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
417 else
418 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
419 }
420
421 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
422
423 if (!Subtarget.hasMOVBE())
424 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
425
426 // These should be promoted to a larger select which is supported.
427 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
428 // X86 wants to expand cmov itself.
429 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
430 setOperationAction(ISD::SELECT, VT, Custom);
431 setOperationAction(ISD::SETCC, VT, Custom);
432 }
433 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
434 if (VT == MVT::i64 && !Subtarget.is64Bit())
435 continue;
436 setOperationAction(ISD::SELECT, VT, Custom);
437 setOperationAction(ISD::SETCC, VT, Custom);
438 }
439
440 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
441 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
442 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
443
444 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
445 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
446 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
447 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
448 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
450 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
451 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
452
453 // Darwin ABI issue.
454 for (auto VT : { MVT::i32, MVT::i64 }) {
455 if (VT == MVT::i64 && !Subtarget.is64Bit())
456 continue;
457 setOperationAction(ISD::ConstantPool , VT, Custom);
458 setOperationAction(ISD::JumpTable , VT, Custom);
459 setOperationAction(ISD::GlobalAddress , VT, Custom);
460 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
461 setOperationAction(ISD::ExternalSymbol , VT, Custom);
462 setOperationAction(ISD::BlockAddress , VT, Custom);
463 }
464
465 // 64-bit shl, sra, srl (iff 32-bit x86)
466 for (auto VT : { MVT::i32, MVT::i64 }) {
467 if (VT == MVT::i64 && !Subtarget.is64Bit())
468 continue;
469 setOperationAction(ISD::SHL_PARTS, VT, Custom);
470 setOperationAction(ISD::SRA_PARTS, VT, Custom);
471 setOperationAction(ISD::SRL_PARTS, VT, Custom);
472 }
473
474 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
475 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
476
477 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
478
479 // Expand certain atomics
480 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
484 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
485 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
486 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
487 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
488 }
489
490 if (!Subtarget.is64Bit())
491 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
492
493 if (Subtarget.hasCmpxchg16b()) {
494 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
495 }
496
497 // FIXME - use subtarget debug flags
498 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
499 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
500 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
501 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
502 }
503
504 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
505 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
506
507 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
508 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
509
510 setOperationAction(ISD::TRAP, MVT::Other, Legal);
511 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
512
513 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
514 setOperationAction(ISD::VASTART , MVT::Other, Custom);
515 setOperationAction(ISD::VAEND , MVT::Other, Expand);
516 bool Is64Bit = Subtarget.is64Bit();
517 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
518 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
519
520 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
521 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
522
523 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
524
525 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
526 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
527 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
528
529 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
530 // f32 and f64 use SSE.
531 // Set up the FP register classes.
532 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
533 : &X86::FR32RegClass);
534 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
535 : &X86::FR64RegClass);
536
537 for (auto VT : { MVT::f32, MVT::f64 }) {
538 // Use ANDPD to simulate FABS.
539 setOperationAction(ISD::FABS, VT, Custom);
540
541 // Use XORP to simulate FNEG.
542 setOperationAction(ISD::FNEG, VT, Custom);
543
544 // Use ANDPD and ORPD to simulate FCOPYSIGN.
545 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
546
547 // These might be better off as horizontal vector ops.
548 setOperationAction(ISD::FADD, VT, Custom);
549 setOperationAction(ISD::FSUB, VT, Custom);
550
551 // We don't support sin/cos/fmod
552 setOperationAction(ISD::FSIN , VT, Expand);
553 setOperationAction(ISD::FCOS , VT, Expand);
554 setOperationAction(ISD::FSINCOS, VT, Expand);
555 }
556
557 // Lower this to MOVMSK plus an AND.
558 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
559 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
560
561 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
562 // Use SSE for f32, x87 for f64.
563 // Set up the FP register classes.
564 addRegisterClass(MVT::f32, &X86::FR32RegClass);
565 if (UseX87)
566 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
567
568 // Use ANDPS to simulate FABS.
569 setOperationAction(ISD::FABS , MVT::f32, Custom);
570
571 // Use XORP to simulate FNEG.
572 setOperationAction(ISD::FNEG , MVT::f32, Custom);
573
574 if (UseX87)
575 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
576
577 // Use ANDPS and ORPS to simulate FCOPYSIGN.
578 if (UseX87)
579 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
580 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
581
582 // We don't support sin/cos/fmod
583 setOperationAction(ISD::FSIN , MVT::f32, Expand);
584 setOperationAction(ISD::FCOS , MVT::f32, Expand);
585 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
586
587 if (UseX87) {
588 // Always expand sin/cos functions even though x87 has an instruction.
589 setOperationAction(ISD::FSIN, MVT::f64, Expand);
590 setOperationAction(ISD::FCOS, MVT::f64, Expand);
591 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
592 }
593 } else if (UseX87) {
594 // f32 and f64 in x87.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
597 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
598
599 for (auto VT : { MVT::f32, MVT::f64 }) {
600 setOperationAction(ISD::UNDEF, VT, Expand);
601 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
602
603 // Always expand sin/cos functions even though x87 has an instruction.
604 setOperationAction(ISD::FSIN , VT, Expand);
605 setOperationAction(ISD::FCOS , VT, Expand);
606 setOperationAction(ISD::FSINCOS, VT, Expand);
607 }
608 }
609
610 // Expand FP32 immediates into loads from the stack, save special cases.
611 if (isTypeLegal(MVT::f32)) {
612 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
613 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
614 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
615 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
616 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
617 } else // SSE immediates.
618 addLegalFPImmediate(APFloat(+0.0f)); // xorps
619 }
620 // Expand FP64 immediates into loads from the stack, save special cases.
621 if (isTypeLegal(MVT::f64)) {
622 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
623 addLegalFPImmediate(APFloat(+0.0)); // FLD0
624 addLegalFPImmediate(APFloat(+1.0)); // FLD1
625 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
626 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
627 } else // SSE immediates.
628 addLegalFPImmediate(APFloat(+0.0)); // xorpd
629 }
630
631 // We don't support FMA.
632 setOperationAction(ISD::FMA, MVT::f64, Expand);
633 setOperationAction(ISD::FMA, MVT::f32, Expand);
634
635 // Long double always uses X87, except f128 in MMX.
636 if (UseX87) {
637 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
638 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
639 : &X86::VR128RegClass);
640 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
641 setOperationAction(ISD::FABS , MVT::f128, Custom);
642 setOperationAction(ISD::FNEG , MVT::f128, Custom);
643 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
644 }
645
646 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
647 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
648 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
649 {
650 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
651 addLegalFPImmediate(TmpFlt); // FLD0
652 TmpFlt.changeSign();
653 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
654
655 bool ignored;
656 APFloat TmpFlt2(+1.0);
657 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
658 &ignored);
659 addLegalFPImmediate(TmpFlt2); // FLD1
660 TmpFlt2.changeSign();
661 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
662 }
663
664 // Always expand sin/cos functions even though x87 has an instruction.
665 setOperationAction(ISD::FSIN , MVT::f80, Expand);
666 setOperationAction(ISD::FCOS , MVT::f80, Expand);
667 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
668
669 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
670 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
671 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
672 setOperationAction(ISD::FRINT, MVT::f80, Expand);
673 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
674 setOperationAction(ISD::FMA, MVT::f80, Expand);
675 }
676
677 // Always use a library call for pow.
678 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
679 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
680 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
681
682 setOperationAction(ISD::FLOG, MVT::f80, Expand);
683 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
684 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
685 setOperationAction(ISD::FEXP, MVT::f80, Expand);
686 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
687 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
688 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
689
690 // Some FP actions are always expanded for vector types.
691 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
692 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
693 setOperationAction(ISD::FSIN, VT, Expand);
694 setOperationAction(ISD::FSINCOS, VT, Expand);
695 setOperationAction(ISD::FCOS, VT, Expand);
696 setOperationAction(ISD::FREM, VT, Expand);
697 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
698 setOperationAction(ISD::FPOW, VT, Expand);
699 setOperationAction(ISD::FLOG, VT, Expand);
700 setOperationAction(ISD::FLOG2, VT, Expand);
701 setOperationAction(ISD::FLOG10, VT, Expand);
702 setOperationAction(ISD::FEXP, VT, Expand);
703 setOperationAction(ISD::FEXP2, VT, Expand);
704 }
705
706 // First set operation action for all vector types to either promote
707 // (for widening) or expand (for scalarization). Then we will selectively
708 // turn on ones that can be effectively codegen'd.
709 for (MVT VT : MVT::vector_valuetypes()) {
710 setOperationAction(ISD::SDIV, VT, Expand);
711 setOperationAction(ISD::UDIV, VT, Expand);
712 setOperationAction(ISD::SREM, VT, Expand);
713 setOperationAction(ISD::UREM, VT, Expand);
714 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
715 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
716 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
717 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
718 setOperationAction(ISD::FMA, VT, Expand);
719 setOperationAction(ISD::FFLOOR, VT, Expand);
720 setOperationAction(ISD::FCEIL, VT, Expand);
721 setOperationAction(ISD::FTRUNC, VT, Expand);
722 setOperationAction(ISD::FRINT, VT, Expand);
723 setOperationAction(ISD::FNEARBYINT, VT, Expand);
724 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
725 setOperationAction(ISD::MULHS, VT, Expand);
726 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
727 setOperationAction(ISD::MULHU, VT, Expand);
728 setOperationAction(ISD::SDIVREM, VT, Expand);
729 setOperationAction(ISD::UDIVREM, VT, Expand);
730 setOperationAction(ISD::CTPOP, VT, Expand);
731 setOperationAction(ISD::CTTZ, VT, Expand);
732 setOperationAction(ISD::CTLZ, VT, Expand);
733 setOperationAction(ISD::ROTL, VT, Expand);
734 setOperationAction(ISD::ROTR, VT, Expand);
735 setOperationAction(ISD::BSWAP, VT, Expand);
736 setOperationAction(ISD::SETCC, VT, Expand);
737 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
738 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
739 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
740 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
741 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
742 setOperationAction(ISD::TRUNCATE, VT, Expand);
743 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
744 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
745 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
746 setOperationAction(ISD::SELECT_CC, VT, Expand);
747 for (MVT InnerVT : MVT::vector_valuetypes()) {
748 setTruncStoreAction(InnerVT, VT, Expand);
749
750 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
751 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
752
753 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
754 // types, we have to deal with them whether we ask for Expansion or not.
755 // Setting Expand causes its own optimisation problems though, so leave
756 // them legal.
757 if (VT.getVectorElementType() == MVT::i1)
758 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
759
760 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
761 // split/scalarized right now.
762 if (VT.getVectorElementType() == MVT::f16)
763 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
764 }
765 }
766
767 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
768 // with -msoft-float, disable use of MMX as well.
769 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
770 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
771 // No operations on x86mmx supported, everything uses intrinsics.
772 }
773
774 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
775 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
776 : &X86::VR128RegClass);
777
778 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
779 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
780 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
781 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
782 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
783 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
784 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
785 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
786 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
787 }
788
789 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
790 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
791 : &X86::VR128RegClass);
792
793 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
794 // registers cannot be used even for integer operations.
795 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
796 : &X86::VR128RegClass);
797 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
798 : &X86::VR128RegClass);
799 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
800 : &X86::VR128RegClass);
801 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
802 : &X86::VR128RegClass);
803
804 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
805 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
806 setOperationAction(ISD::SDIV, VT, Custom);
807 setOperationAction(ISD::SREM, VT, Custom);
808 setOperationAction(ISD::UDIV, VT, Custom);
809 setOperationAction(ISD::UREM, VT, Custom);
810 }
811
812 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
813 setOperationAction(ISD::MUL, MVT::v2i16, Custom);
814 setOperationAction(ISD::MUL, MVT::v2i32, Custom);
815 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
816 setOperationAction(ISD::MUL, MVT::v4i16, Custom);
817 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
818
819 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
820 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
821 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
822 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
823 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
824 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
825 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
826 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
827 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
828 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
829 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
830 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
831 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
832
833 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
834 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
835 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
836 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
837 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
838 }
839
840 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
841 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
842 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
843 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
844 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
845 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
846 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
847 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
848 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
849 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
850 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
851 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
852
853 if (!ExperimentalVectorWideningLegalization) {
854 // Use widening instead of promotion.
855 for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
856 MVT::v4i16, MVT::v2i16 }) {
857 setOperationAction(ISD::UADDSAT, VT, Custom);
858 setOperationAction(ISD::SADDSAT, VT, Custom);
859 setOperationAction(ISD::USUBSAT, VT, Custom);
860 setOperationAction(ISD::SSUBSAT, VT, Custom);
861 }
862 }
863
864 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
865 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
866 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
867
868 // Provide custom widening for v2f32 setcc. This is really for VLX when
869 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
870 // type legalization changing the result type to v4i1 during widening.
871 // It works fine for SSE2 and is probably faster so no need to qualify with
872 // VLX support.
873 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
874
875 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
876 setOperationAction(ISD::SETCC, VT, Custom);
877 setOperationAction(ISD::CTPOP, VT, Custom);
878 setOperationAction(ISD::ABS, VT, Custom);
879
880 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
881 // setcc all the way to isel and prefer SETGT in some isel patterns.
882 setCondCodeAction(ISD::SETLT, VT, Custom);
883 setCondCodeAction(ISD::SETLE, VT, Custom);
884 }
885
886 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
887 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
888 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
889 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
890 setOperationAction(ISD::VSELECT, VT, Custom);
891 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
892 }
893
894 // We support custom legalizing of sext and anyext loads for specific
895 // memory vector types which we can load as a scalar (or sequence of
896 // scalars) and extend in-register to a legal 128-bit vector type. For sext
897 // loads these must work with a single scalar load.
898 for (MVT VT : MVT::integer_vector_valuetypes()) {
899 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
900 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
901 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
902 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
903 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
904 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
905 }
906
907 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
908 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
909 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
910 setOperationAction(ISD::VSELECT, VT, Custom);
911
912 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
913 continue;
914
915 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
916 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
917 }
918
919 // Custom lower v2i64 and v2f64 selects.
920 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
921 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
922 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
923 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
924 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
925
926 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
927 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
928 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
929
930 // Custom legalize these to avoid over promotion or custom promotion.
931 setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
932 setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
933 setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
934 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
935 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
936 setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
937 setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
938 setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
939 setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
940 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
941
942 // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
943 // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
944 // split again based on the input type, this will cause an AssertSExt i16 to
945 // be emitted instead of an AssertZExt. This will allow packssdw followed by
946 // packuswb to be used to truncate to v8i8. This is necessary since packusdw
947 // isn't available until sse4.1.
948 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
949
950 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
951 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
952
953 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
954
955 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
956 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
957
958 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
959 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
960
961 for (MVT VT : MVT::fp_vector_valuetypes())
962 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
963
964 // We want to legalize this to an f64 load rather than an i64 load on
965 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
966 // store.
967 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
968 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
969 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
970 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
971 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
972 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
973 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
974 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
975
976 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
977 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
978 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
979 if (!Subtarget.hasAVX512())
980 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
981
982 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
983 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
984 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
985
986 if (ExperimentalVectorWideningLegalization) {
987 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
988
989 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
990 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
991 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
992 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
993 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
994 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
995 } else {
996 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
997 }
998
999 // In the customized shift lowering, the legal v4i32/v2i64 cases
1000 // in AVX2 will be recognized.
1001 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1002 setOperationAction(ISD::SRL, VT, Custom);
1003 setOperationAction(ISD::SHL, VT, Custom);
1004 setOperationAction(ISD::SRA, VT, Custom);
1005 }
1006
1007 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1008 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1009
1010 // With AVX512, expanding (and promoting the shifts) is better.
1011 if (!Subtarget.hasAVX512())
1012 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1013 }
1014
1015 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1016 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1017 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1018 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1019 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1020 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1021 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1022 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1023 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1024
1025 // These might be better off as horizontal vector ops.
1026 setOperationAction(ISD::ADD, MVT::i16, Custom);
1027 setOperationAction(ISD::ADD, MVT::i32, Custom);
1028 setOperationAction(ISD::SUB, MVT::i16, Custom);
1029 setOperationAction(ISD::SUB, MVT::i32, Custom);
1030 }
1031
1032 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1033 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1034 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1035 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1036 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1037 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1038 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1039 }
1040
1041 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1042 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1043 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1044 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1045 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1046 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1047 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1048 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1049
1050 // FIXME: Do we need to handle scalar-to-vector here?
1051 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1052
1053 // We directly match byte blends in the backend as they match the VSELECT
1054 // condition form.
1055 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1056
1057 // SSE41 brings specific instructions for doing vector sign extend even in
1058 // cases where we don't have SRA.
1059 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1060 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1061 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1062 }
1063
1064 if (!ExperimentalVectorWideningLegalization) {
1065 // Avoid narrow result types when widening. The legal types are listed
1066 // in the next loop.
1067 for (MVT VT : MVT::integer_vector_valuetypes()) {
1068 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1069 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1070 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1071 }
1072 }
1073
1074 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1075 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1076 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1077 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1078 if (!ExperimentalVectorWideningLegalization)
1079 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
1080 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1081 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1082 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1083 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1084 }
1085
1086 // i8 vectors are custom because the source register and source
1087 // source memory operand types are not the same width.
1088 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1089 }
1090
1091 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1092 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1093 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1094 setOperationAction(ISD::ROTL, VT, Custom);
1095
1096 // XOP can efficiently perform BITREVERSE with VPPERM.
1097 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1098 setOperationAction(ISD::BITREVERSE, VT, Custom);
1099
1100 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1101 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1102 setOperationAction(ISD::BITREVERSE, VT, Custom);
1103 }
1104
1105 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1106 bool HasInt256 = Subtarget.hasInt256();
1107
1108 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1109 : &X86::VR256RegClass);
1110 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1111 : &X86::VR256RegClass);
1112 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1113 : &X86::VR256RegClass);
1114 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1115 : &X86::VR256RegClass);
1116 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1117 : &X86::VR256RegClass);
1118 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1119 : &X86::VR256RegClass);
1120
1121 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1122 setOperationAction(ISD::FFLOOR, VT, Legal);
1123 setOperationAction(ISD::FCEIL, VT, Legal);
1124 setOperationAction(ISD::FTRUNC, VT, Legal);
1125 setOperationAction(ISD::FRINT, VT, Legal);
1126 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1127 setOperationAction(ISD::FNEG, VT, Custom);
1128 setOperationAction(ISD::FABS, VT, Custom);
1129 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1130 }
1131
1132 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1133 // even though v8i16 is a legal type.
1134 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1135 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1136 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1137
1138 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1139 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1140
1141 if (!Subtarget.hasAVX512())
1142 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1143
1144 for (MVT VT : MVT::fp_vector_valuetypes())
1145 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1146
1147 // In the customized shift lowering, the legal v8i32/v4i64 cases
1148 // in AVX2 will be recognized.
1149 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1150 setOperationAction(ISD::SRL, VT, Custom);
1151 setOperationAction(ISD::SHL, VT, Custom);
1152 setOperationAction(ISD::SRA, VT, Custom);
1153 }
1154
1155 // These types need custom splitting if their input is a 128-bit vector.
1156 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1157 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1158 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1159 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1160
1161 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1162 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1163
1164 // With BWI, expanding (and promoting the shifts) is the better.
1165 if (!Subtarget.hasBWI())
1166 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1167
1168 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1169 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1170 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1171 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1172 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1173 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1174
1175 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1176 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1177 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1178 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1179 }
1180
1181 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1182 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1183 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1184 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1185
1186 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1187 setOperationAction(ISD::SETCC, VT, Custom);
1188 setOperationAction(ISD::CTPOP, VT, Custom);
1189 setOperationAction(ISD::CTLZ, VT, Custom);
1190
1191 // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
1192 setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
1193
1194 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1195 // setcc all the way to isel and prefer SETGT in some isel patterns.
1196 setCondCodeAction(ISD::SETLT, VT, Custom);
1197 setCondCodeAction(ISD::SETLE, VT, Custom);
1198 }
1199
1200 if (Subtarget.hasAnyFMA()) {
1201 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1202 MVT::v2f64, MVT::v4f64 })
1203 setOperationAction(ISD::FMA, VT, Legal);
1204 }
1205
1206 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1207 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1208 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1209 }
1210
1211 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1212 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1213 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1214 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1215
1216 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1217 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1218 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1219 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1220 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1221 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1222
1223 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1224 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1225 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1226 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1227 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1228
1229 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1230 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1231 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1232 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1233 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1234 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1235 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1236 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1237
1238 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1239 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1240 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1241 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1242 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1243 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1244 }
1245
1246 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1247 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1248 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1249 }
1250
1251 if (HasInt256) {
1252 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1253 // when we have a 256bit-wide blend with immediate.
1254 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1255
1256 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1257 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1258 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1259 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1260 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1261 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1262 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1263 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1264 }
1265 }
1266
1267 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1268 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1269 setOperationAction(ISD::MLOAD, VT, Legal);
1270 setOperationAction(ISD::MSTORE, VT, Legal);
1271 }
1272
1273 // Extract subvector is special because the value type
1274 // (result) is 128-bit but the source is 256-bit wide.
1275 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1276 MVT::v4f32, MVT::v2f64 }) {
1277 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1278 }
1279
1280 // Custom lower several nodes for 256-bit types.
1281 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1282 MVT::v8f32, MVT::v4f64 }) {
1283 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1284 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1285 setOperationAction(ISD::VSELECT, VT, Custom);
1286 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1288 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1289 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1290 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1291 }
1292
1293 if (HasInt256)
1294 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1295
1296 if (HasInt256) {
1297 // Custom legalize 2x32 to get a little better code.
1298 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1299 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1300
1301 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1302 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1303 setOperationAction(ISD::MGATHER, VT, Custom);
1304 }
1305 }
1306
1307 // This block controls legalization of the mask vector sizes that are
1308 // available with AVX512. 512-bit vectors are in a separate block controlled
1309 // by useAVX512Regs.
1310 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1311 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1312 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1313 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1314 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1315 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1316
1317 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1318 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1319 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1320
1321 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1322 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1323 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1324 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1325 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1326 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1327
1328 // There is no byte sized k-register load or store without AVX512DQ.
1329 if (!Subtarget.hasDQI()) {
1330 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1331 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1332 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1333 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1334
1335 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1336 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1337 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1338 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1339 }
1340
1341 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1342 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1343 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1344 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1345 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1346 }
1347
1348 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1349 setOperationAction(ISD::ADD, VT, Custom);
1350 setOperationAction(ISD::SUB, VT, Custom);
1351 setOperationAction(ISD::MUL, VT, Custom);
1352 setOperationAction(ISD::SETCC, VT, Custom);
1353 setOperationAction(ISD::SELECT, VT, Custom);
1354 setOperationAction(ISD::TRUNCATE, VT, Custom);
1355 setOperationAction(ISD::UADDSAT, VT, Custom);
1356 setOperationAction(ISD::SADDSAT, VT, Custom);
1357 setOperationAction(ISD::USUBSAT, VT, Custom);
1358 setOperationAction(ISD::SSUBSAT, VT, Custom);
1359
1360 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1361 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1362 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1363 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1364 setOperationAction(ISD::VSELECT, VT, Expand);
1365 }
1366
1367 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1368 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1369 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1370 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1371 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1372 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1373 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1374 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1375 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1376 }
1377
1378 // This block controls legalization for 512-bit operations with 32/64 bit
1379 // elements. 512-bits can be disabled based on prefer-vector-width and
1380 // required-vector-width function attributes.
1381 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1382 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1383 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1384 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1385 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1386
1387 for (MVT VT : MVT::fp_vector_valuetypes())
1388 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1389
1390 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1391 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1392 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1393 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1394 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1395 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1396 }
1397
1398 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1399 setOperationAction(ISD::FNEG, VT, Custom);
1400 setOperationAction(ISD::FABS, VT, Custom);
1401 setOperationAction(ISD::FMA, VT, Legal);
1402 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1403 }
1404
1405 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1406 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1407 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1408 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1409 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1410 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1411 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1412 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1413 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1414 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1415
1416 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1417 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1418 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1419 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1420 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1421
1422 if (!Subtarget.hasVLX()) {
1423 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1424 // to 512-bit rather than use the AVX2 instructions so that we can use
1425 // k-masks.
1426 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1427 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1428 setOperationAction(ISD::MLOAD, VT, Custom);
1429 setOperationAction(ISD::MSTORE, VT, Custom);
1430 }
1431 }
1432
1433 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1434 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1435 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1436 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1437 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1438 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1439 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1440 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1441
1442 if (ExperimentalVectorWideningLegalization) {
1443 // Need to custom widen this if we don't have AVX512BW.
1444 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1445 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1446 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1447 }
1448
1449 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1450 setOperationAction(ISD::FFLOOR, VT, Legal);
1451 setOperationAction(ISD::FCEIL, VT, Legal);
1452 setOperationAction(ISD::FTRUNC, VT, Legal);
1453 setOperationAction(ISD::FRINT, VT, Legal);
1454 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1455 }
1456
1457 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1458 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1459 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1460 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1461 }
1462
1463 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1464 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1465 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1466 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1467
1468 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1469 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1470
1471 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1472 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1473
1474 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1475 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1476 setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
1477 setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
1478 setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
1479 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1480
1481 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1482 setOperationAction(ISD::SMAX, VT, Legal);
1483 setOperationAction(ISD::UMAX, VT, Legal);
1484 setOperationAction(ISD::SMIN, VT, Legal);
1485 setOperationAction(ISD::UMIN, VT, Legal);
1486 setOperationAction(ISD::ABS, VT, Legal);
1487 setOperationAction(ISD::SRL, VT, Custom);
1488 setOperationAction(ISD::SHL, VT, Custom);
1489 setOperationAction(ISD::SRA, VT, Custom);
1490 setOperationAction(ISD::CTPOP, VT, Custom);
1491 setOperationAction(ISD::ROTL, VT, Custom);
1492 setOperationAction(ISD::ROTR, VT, Custom);
1493 setOperationAction(ISD::SETCC, VT, Custom);
1494
1495 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1496 // setcc all the way to isel and prefer SETGT in some isel patterns.
1497 setCondCodeAction(ISD::SETLT, VT, Custom);
1498 setCondCodeAction(ISD::SETLE, VT, Custom);
1499 }
1500
1501 if (Subtarget.hasDQI()) {
1502 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1503 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1504 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1505 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1506
1507 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1508 }
1509
1510 if (Subtarget.hasCDI()) {
1511 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1512 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1513 setOperationAction(ISD::CTLZ, VT, Legal);
1514 }
1515 } // Subtarget.hasCDI()
1516
1517 if (Subtarget.hasVPOPCNTDQ()) {
1518 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1519 setOperationAction(ISD::CTPOP, VT, Legal);
1520 }
1521
1522 // Extract subvector is special because the value type
1523 // (result) is 256-bit but the source is 512-bit wide.
1524 // 128-bit was made Legal under AVX1.
1525 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1526 MVT::v8f32, MVT::v4f64 })
1527 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1528
1529 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1530 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1531 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1532 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1533 setOperationAction(ISD::VSELECT, VT, Custom);
1534 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1535 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1536 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1537 setOperationAction(ISD::MLOAD, VT, Legal);
1538 setOperationAction(ISD::MSTORE, VT, Legal);
1539 setOperationAction(ISD::MGATHER, VT, Custom);
1540 setOperationAction(ISD::MSCATTER, VT, Custom);
1541 }
1542 // Need to custom split v32i16/v64i8 bitcasts.
1543 if (!Subtarget.hasBWI()) {
1544 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1545 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1546 }
1547
1548 if (Subtarget.hasVBMI2()) {
1549 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1550 setOperationAction(ISD::FSHL, VT, Custom);
1551 setOperationAction(ISD::FSHR, VT, Custom);
1552 }
1553 }
1554 }// has AVX-512
1555
1556 // This block controls legalization for operations that don't have
1557 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1558 // narrower widths.
1559 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1560 // These operations are handled on non-VLX by artificially widening in
1561 // isel patterns.
1562 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1563
1564 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1565 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1566 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1567 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1568 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1569
1570 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1571 setOperationAction(ISD::SMAX, VT, Legal);
1572 setOperationAction(ISD::UMAX, VT, Legal);
1573 setOperationAction(ISD::SMIN, VT, Legal);
1574 setOperationAction(ISD::UMIN, VT, Legal);
1575 setOperationAction(ISD::ABS, VT, Legal);
1576 }
1577
1578 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1579 setOperationAction(ISD::ROTL, VT, Custom);
1580 setOperationAction(ISD::ROTR, VT, Custom);
1581 }
1582
1583 // Custom legalize 2x32 to get a little better code.
1584 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1585 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1586
1587 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1588 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1589 setOperationAction(ISD::MSCATTER, VT, Custom);
1590
1591 if (Subtarget.hasDQI()) {
1592 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1593 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1594 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1595 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1596 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1597
1598 setOperationAction(ISD::MUL, VT, Legal);
1599 }
1600 }
1601
1602 if (Subtarget.hasCDI()) {
1603 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1604 setOperationAction(ISD::CTLZ, VT, Legal);
1605 }
1606 } // Subtarget.hasCDI()
1607
1608 if (Subtarget.hasVPOPCNTDQ()) {
1609 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1610 setOperationAction(ISD::CTPOP, VT, Legal);
1611 }
1612 }
1613
1614 // This block control legalization of v32i1/v64i1 which are available with
1615 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1616 // useBWIRegs.
1617 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1618 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1619 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1620
1621 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1622 setOperationAction(ISD::ADD, VT, Custom);
1623 setOperationAction(ISD::SUB, VT, Custom);
1624 setOperationAction(ISD::MUL, VT, Custom);
1625 setOperationAction(ISD::VSELECT, VT, Expand);
1626 setOperationAction(ISD::UADDSAT, VT, Custom);
1627 setOperationAction(ISD::SADDSAT, VT, Custom);
1628 setOperationAction(ISD::USUBSAT, VT, Custom);
1629 setOperationAction(ISD::SSUBSAT, VT, Custom);
1630
1631 setOperationAction(ISD::TRUNCATE, VT, Custom);
1632 setOperationAction(ISD::SETCC, VT, Custom);
1633 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1634 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1635 setOperationAction(ISD::SELECT, VT, Custom);
1636 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1637 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1638 }
1639
1640 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1641 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1642 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1643 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1644 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1645 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1646
1647 // Extends from v32i1 masks to 256-bit vectors.
1648 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1649 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1650 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1651 }
1652
1653 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1654 // disabled based on prefer-vector-width and required-vector-width function
1655 // attributes.
1656 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1657 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1658 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1659
1660 // Extends from v64i1 masks to 512-bit vectors.
1661 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1662 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1663 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1664
1665 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1666 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1667 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1668 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1669 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1670 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1671 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1672 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1673 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1674 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1675 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1676 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1677 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1678 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1679 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1680 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1681 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1682 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1683 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1684 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1685 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1686 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1687 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1688
1689 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1690 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1691
1692 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1693
1694 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1695 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1696 setOperationAction(ISD::VSELECT, VT, Custom);
1697 setOperationAction(ISD::ABS, VT, Legal);
1698 setOperationAction(ISD::SRL, VT, Custom);
1699 setOperationAction(ISD::SHL, VT, Custom);
1700 setOperationAction(ISD::SRA, VT, Custom);
1701 setOperationAction(ISD::MLOAD, VT, Legal);
1702 setOperationAction(ISD::MSTORE, VT, Legal);
1703 setOperationAction(ISD::CTPOP, VT, Custom);
1704 setOperationAction(ISD::CTLZ, VT, Custom);
1705 setOperationAction(ISD::SMAX, VT, Legal);
1706 setOperationAction(ISD::UMAX, VT, Legal);
1707 setOperationAction(ISD::SMIN, VT, Legal);
1708 setOperationAction(ISD::UMIN, VT, Legal);
1709 setOperationAction(ISD::SETCC, VT, Custom);
1710 setOperationAction(ISD::UADDSAT, VT, Legal);
1711 setOperationAction(ISD::SADDSAT, VT, Legal);
1712 setOperationAction(ISD::USUBSAT, VT, Legal);
1713 setOperationAction(ISD::SSUBSAT, VT, Legal);
1714
1715 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1716 // setcc all the way to isel and prefer SETGT in some isel patterns.
1717 setCondCodeAction(ISD::SETLT, VT, Custom);
1718 setCondCodeAction(ISD::SETLE, VT, Custom);
1719 }
1720
1721 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1722 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1723 }
1724
1725 if (Subtarget.hasBITALG()) {
1726 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1727 setOperationAction(ISD::CTPOP, VT, Legal);
1728 }
1729
1730 if (Subtarget.hasVBMI2()) {
1731 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1732 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1733 }
1734 }
1735
1736 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1737 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1738 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1739 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1740 }
1741
1742 // These operations are handled on non-VLX by artificially widening in
1743 // isel patterns.
1744 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1745
1746 if (Subtarget.hasBITALG()) {
1747 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1748 setOperationAction(ISD::CTPOP, VT, Legal);
1749 }
1750 }
1751
1752 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1753 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1754 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1755 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1756 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1757 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1758
1759 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1760 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1761 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1762 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1763 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1764
1765 if (Subtarget.hasDQI()) {
1766 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1767 // v2f32 UINT_TO_FP is already custom under SSE2.
1768 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1769 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 1770, __PRETTY_FUNCTION__))
1770 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 1770, __PRETTY_FUNCTION__))
;
1771 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1772 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1773 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1774 }
1775
1776 if (Subtarget.hasBWI()) {
1777 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1778 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1779 }
1780
1781 if (Subtarget.hasVBMI2()) {
1782 // TODO: Make these legal even without VLX?
1783 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1784 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1785 setOperationAction(ISD::FSHL, VT, Custom);
1786 setOperationAction(ISD::FSHR, VT, Custom);
1787 }
1788 }
1789 }
1790
1791 // We want to custom lower some of our intrinsics.
1792 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1793 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1794 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1795 if (!Subtarget.is64Bit()) {
1796 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1797 }
1798
1799 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1800 // handle type legalization for these operations here.
1801 //
1802 // FIXME: We really should do custom legalization for addition and
1803 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1804 // than generic legalization for 64-bit multiplication-with-overflow, though.
1805 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1806 if (VT == MVT::i64 && !Subtarget.is64Bit())
1807 continue;
1808 // Add/Sub/Mul with overflow operations are custom lowered.
1809 setOperationAction(ISD::SADDO, VT, Custom);
1810 setOperationAction(ISD::UADDO, VT, Custom);
1811 setOperationAction(ISD::SSUBO, VT, Custom);
1812 setOperationAction(ISD::USUBO, VT, Custom);
1813 setOperationAction(ISD::SMULO, VT, Custom);
1814 setOperationAction(ISD::UMULO, VT, Custom);
1815
1816 // Support carry in as value rather than glue.
1817 setOperationAction(ISD::ADDCARRY, VT, Custom);
1818 setOperationAction(ISD::SUBCARRY, VT, Custom);
1819 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1820 }
1821
1822 if (!Subtarget.is64Bit()) {
1823 // These libcalls are not available in 32-bit.
1824 setLibcallName(RTLIB::SHL_I128, nullptr);
1825 setLibcallName(RTLIB::SRL_I128, nullptr);
1826 setLibcallName(RTLIB::SRA_I128, nullptr);
1827 setLibcallName(RTLIB::MUL_I128, nullptr);
1828 }
1829
1830 // Combine sin / cos into _sincos_stret if it is available.
1831 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1832 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1833 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1834 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1835 }
1836
1837 if (Subtarget.isTargetWin64()) {
1838 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1839 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1840 setOperationAction(ISD::SREM, MVT::i128, Custom);
1841 setOperationAction(ISD::UREM, MVT::i128, Custom);
1842 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1843 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1844 }
1845
1846 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1847 // is. We should promote the value to 64-bits to solve this.
1848 // This is what the CRT headers do - `fmodf` is an inline header
1849 // function casting to f64 and calling `fmod`.
1850 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1851 Subtarget.isTargetWindowsItanium()))
1852 for (ISD::NodeType Op :
1853 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1854 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1855 if (isOperationExpand(Op, MVT::f32))
1856 setOperationAction(Op, MVT::f32, Promote);
1857
1858 // We have target-specific dag combine patterns for the following nodes:
1859 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1860 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1861 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1862 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1863 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1864 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1865 setTargetDAGCombine(ISD::BITCAST);
1866 setTargetDAGCombine(ISD::VSELECT);
1867 setTargetDAGCombine(ISD::SELECT);
1868 setTargetDAGCombine(ISD::SHL);
1869 setTargetDAGCombine(ISD::SRA);
1870 setTargetDAGCombine(ISD::SRL);
1871 setTargetDAGCombine(ISD::OR);
1872 setTargetDAGCombine(ISD::AND);
1873 setTargetDAGCombine(ISD::ADD);
1874 setTargetDAGCombine(ISD::FADD);
1875 setTargetDAGCombine(ISD::FSUB);
1876 setTargetDAGCombine(ISD::FNEG);
1877 setTargetDAGCombine(ISD::FMA);
1878 setTargetDAGCombine(ISD::FMINNUM);
1879 setTargetDAGCombine(ISD::FMAXNUM);
1880 setTargetDAGCombine(ISD::SUB);
1881 setTargetDAGCombine(ISD::LOAD);
1882 setTargetDAGCombine(ISD::MLOAD);
1883 setTargetDAGCombine(ISD::STORE);
1884 setTargetDAGCombine(ISD::MSTORE);
1885 setTargetDAGCombine(ISD::TRUNCATE);
1886 setTargetDAGCombine(ISD::ZERO_EXTEND);
1887 setTargetDAGCombine(ISD::ANY_EXTEND);
1888 setTargetDAGCombine(ISD::SIGN_EXTEND);
1889 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1890 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1891 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1892 setTargetDAGCombine(ISD::SINT_TO_FP);
1893 setTargetDAGCombine(ISD::UINT_TO_FP);
1894 setTargetDAGCombine(ISD::SETCC);
1895 setTargetDAGCombine(ISD::MUL);
1896 setTargetDAGCombine(ISD::XOR);
1897 setTargetDAGCombine(ISD::MSCATTER);
1898 setTargetDAGCombine(ISD::MGATHER);
1899
1900 computeRegisterProperties(Subtarget.getRegisterInfo());
1901
1902 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1903 MaxStoresPerMemsetOptSize = 8;
1904 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1905 MaxStoresPerMemcpyOptSize = 4;
1906 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1907 MaxStoresPerMemmoveOptSize = 4;
1908
1909 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1910 // that needs to benchmarked and balanced with the potential use of vector
1911 // load/store types (PR33329, PR33914).
1912 MaxLoadsPerMemcmp = 2;
1913 MaxLoadsPerMemcmpOptSize = 2;
1914
1915 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1916 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1917
1918 // An out-of-order CPU can speculatively execute past a predictable branch,
1919 // but a conditional move could be stalled by an expensive earlier operation.
1920 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1921 EnableExtLdPromotion = true;
1922 setPrefFunctionAlignment(4); // 2^4 bytes.
1923
1924 verifyIntrinsicTables();
1925}
1926
1927// This has so far only been implemented for 64-bit MachO.
1928bool X86TargetLowering::useLoadStackGuardNode() const {
1929 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1930}
1931
1932bool X86TargetLowering::useStackGuardXorFP() const {
1933 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1934 return Subtarget.getTargetTriple().isOSMSVCRT();
1935}
1936
1937SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1938 const SDLoc &DL) const {
1939 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1940 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1941 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1942 return SDValue(Node, 0);
1943}
1944
1945TargetLoweringBase::LegalizeTypeAction
1946X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1947 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1948 return TypeSplitVector;
1949
1950 if (ExperimentalVectorWideningLegalization &&
1951 VT.getVectorNumElements() != 1 &&
1952 VT.getVectorElementType() != MVT::i1)
1953 return TypeWidenVector;
1954
1955 return TargetLoweringBase::getPreferredVectorAction(VT);
1956}
1957
1958MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1959 CallingConv::ID CC,
1960 EVT VT) const {
1961 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1962 return MVT::v32i8;
1963 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1964}
1965
1966unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1967 CallingConv::ID CC,
1968 EVT VT) const {
1969 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1970 return 1;
1971 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1972}
1973
1974EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1975 LLVMContext& Context,
1976 EVT VT) const {
1977 if (!VT.isVector())
1978 return MVT::i8;
1979
1980 if (Subtarget.hasAVX512()) {
1981 const unsigned NumElts = VT.getVectorNumElements();
1982
1983 // Figure out what this type will be legalized to.
1984 EVT LegalVT = VT;
1985 while (getTypeAction(Context, LegalVT) != TypeLegal)
1986 LegalVT = getTypeToTransformTo(Context, LegalVT);
1987
1988 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1989 if (LegalVT.getSimpleVT().is512BitVector())
1990 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1991
1992 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1993 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1994 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1995 // vXi16/vXi8.
1996 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1997 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1998 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1999 }
2000 }
2001
2002 return VT.changeVectorElementTypeToInteger();
2003}
2004
2005/// Helper for getByValTypeAlignment to determine
2006/// the desired ByVal argument alignment.
2007static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2008 if (MaxAlign == 16)
2009 return;
2010 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2011 if (VTy->getBitWidth() == 128)
2012 MaxAlign = 16;
2013 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2014 unsigned EltAlign = 0;
2015 getMaxByValAlign(ATy->getElementType(), EltAlign);
2016 if (EltAlign > MaxAlign)
2017 MaxAlign = EltAlign;
2018 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2019 for (auto *EltTy : STy->elements()) {
2020 unsigned EltAlign = 0;
2021 getMaxByValAlign(EltTy, EltAlign);
2022 if (EltAlign > MaxAlign)
2023 MaxAlign = EltAlign;
2024 if (MaxAlign == 16)
2025 break;
2026 }
2027 }
2028}
2029
2030/// Return the desired alignment for ByVal aggregate
2031/// function arguments in the caller parameter area. For X86, aggregates
2032/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2033/// are at 4-byte boundaries.
2034unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2035 const DataLayout &DL) const {
2036 if (Subtarget.is64Bit()) {
2037 // Max of 8 and alignment of type.
2038 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2039 if (TyAlign > 8)
2040 return TyAlign;
2041 return 8;
2042 }
2043
2044 unsigned Align = 4;
2045 if (Subtarget.hasSSE1())
2046 getMaxByValAlign(Ty, Align);
2047 return Align;
2048}
2049
2050/// Returns the target specific optimal type for load
2051/// and store operations as a result of memset, memcpy, and memmove
2052/// lowering. If DstAlign is zero that means it's safe to destination
2053/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2054/// means there isn't a need to check it against alignment requirement,
2055/// probably because the source does not need to be loaded. If 'IsMemset' is
2056/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2057/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2058/// source is constant so it does not need to be loaded.
2059/// It returns EVT::Other if the type should be determined using generic
2060/// target-independent logic.
2061EVT
2062X86TargetLowering::getOptimalMemOpType(uint64_t Size,
2063 unsigned DstAlign, unsigned SrcAlign,
2064 bool IsMemset, bool ZeroMemset,
2065 bool MemcpyStrSrc,
2066 MachineFunction &MF) const {
2067 const Function &F = MF.getFunction();
2068 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
2069 if (Size >= 16 &&
2070 (!Subtarget.isUnalignedMem16Slow() ||
2071 ((DstAlign == 0 || DstAlign >= 16) &&
2072 (SrcAlign == 0 || SrcAlign >= 16)))) {
2073 // FIXME: Check if unaligned 32-byte accesses are slow.
2074 if (Size >= 32 && Subtarget.hasAVX()) {
2075 // Although this isn't a well-supported type for AVX1, we'll let
2076 // legalization and shuffle lowering produce the optimal codegen. If we
2077 // choose an optimal type with a vector element larger than a byte,
2078 // getMemsetStores() may create an intermediate splat (using an integer
2079 // multiply) before we splat as a vector.
2080 return MVT::v32i8;
2081 }
2082 if (Subtarget.hasSSE2())
2083 return MVT::v16i8;
2084 // TODO: Can SSE1 handle a byte vector?
2085 // If we have SSE1 registers we should be able to use them.
2086 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
2087 return MVT::v4f32;
2088 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2089 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2090 // Do not use f64 to lower memcpy if source is string constant. It's
2091 // better to use i32 to avoid the loads.
2092 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2093 // The gymnastics of splatting a byte value into an XMM register and then
2094 // only using 8-byte stores (because this is a CPU with slow unaligned
2095 // 16-byte accesses) makes that a loser.
2096 return MVT::f64;
2097 }
2098 }
2099 // This is a compromise. If we reach here, unaligned accesses may be slow on
2100 // this target. However, creating smaller, aligned accesses could be even
2101 // slower and would certainly be a lot more code.
2102 if (Subtarget.is64Bit() && Size >= 8)
2103 return MVT::i64;
2104 return MVT::i32;
2105}
2106
2107bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2108 if (VT == MVT::f32)
2109 return X86ScalarSSEf32;
2110 else if (VT == MVT::f64)
2111 return X86ScalarSSEf64;
2112 return true;
2113}
2114
2115bool
2116X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2117 unsigned,
2118 unsigned,
2119 bool *Fast) const {
2120 if (Fast) {
2121 switch (VT.getSizeInBits()) {
2122 default:
2123 // 8-byte and under are always assumed to be fast.
2124 *Fast = true;
2125 break;
2126 case 128:
2127 *Fast = !Subtarget.isUnalignedMem16Slow();
2128 break;
2129 case 256:
2130 *Fast = !Subtarget.isUnalignedMem32Slow();
2131 break;
2132 // TODO: What about AVX-512 (512-bit) accesses?
2133 }
2134 }
2135 // Misaligned accesses of any size are always allowed.
2136 return true;
2137}
2138
2139/// Return the entry encoding for a jump table in the
2140/// current function. The returned value is a member of the
2141/// MachineJumpTableInfo::JTEntryKind enum.
2142unsigned X86TargetLowering::getJumpTableEncoding() const {
2143 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2144 // symbol.
2145 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2146 return MachineJumpTableInfo::EK_Custom32;
2147
2148 // Otherwise, use the normal jump table encoding heuristics.
2149 return TargetLowering::getJumpTableEncoding();
2150}
2151
2152bool X86TargetLowering::useSoftFloat() const {
2153 return Subtarget.useSoftFloat();
2154}
2155
2156void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2157 ArgListTy &Args) const {
2158
2159 // Only relabel X86-32 for C / Stdcall CCs.
2160 if (Subtarget.is64Bit())
2161 return;
2162 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2163 return;
2164 unsigned ParamRegs = 0;
2165 if (auto *M = MF->getFunction().getParent())
2166 ParamRegs = M->getNumberRegisterParameters();
2167
2168 // Mark the first N int arguments as having reg
2169 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2170 Type *T = Args[Idx].Ty;
2171 if (T->isIntOrPtrTy())
2172 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2173 unsigned numRegs = 1;
2174 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2175 numRegs = 2;
2176 if (ParamRegs < numRegs)
2177 return;
2178 ParamRegs -= numRegs;
2179 Args[Idx].IsInReg = true;
2180 }
2181 }
2182}
2183
2184const MCExpr *
2185X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2186 const MachineBasicBlock *MBB,
2187 unsigned uid,MCContext &Ctx) const{
2188 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2188, __PRETTY_FUNCTION__))
;
2189 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2190 // entries.
2191 return MCSymbolRefExpr::create(MBB->getSymbol(),
2192 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2193}
2194
2195/// Returns relocation base for the given PIC jumptable.
2196SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2197 SelectionDAG &DAG) const {
2198 if (!Subtarget.is64Bit())
2199 // This doesn't have SDLoc associated with it, but is not really the
2200 // same as a Register.
2201 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2202 getPointerTy(DAG.getDataLayout()));
2203 return Table;
2204}
2205
2206/// This returns the relocation base for the given PIC jumptable,
2207/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2208const MCExpr *X86TargetLowering::
2209getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2210 MCContext &Ctx) const {
2211 // X86-64 uses RIP relative addressing based on the jump table label.
2212 if (Subtarget.isPICStyleRIPRel())
2213 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2214
2215 // Otherwise, the reference is relative to the PIC base.
2216 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2217}
2218
2219std::pair<const TargetRegisterClass *, uint8_t>
2220X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2221 MVT VT) const {
2222 const TargetRegisterClass *RRC = nullptr;
2223 uint8_t Cost = 1;
2224 switch (VT.SimpleTy) {
2225 default:
2226 return TargetLowering::findRepresentativeClass(TRI, VT);
2227 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2228 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2229 break;
2230 case MVT::x86mmx:
2231 RRC = &X86::VR64RegClass;
2232 break;
2233 case MVT::f32: case MVT::f64:
2234 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2235 case MVT::v4f32: case MVT::v2f64:
2236 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2237 case MVT::v8f32: case MVT::v4f64:
2238 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2239 case MVT::v16f32: case MVT::v8f64:
2240 RRC = &X86::VR128XRegClass;
2241 break;
2242 }
2243 return std::make_pair(RRC, Cost);
2244}
2245
2246unsigned X86TargetLowering::getAddressSpace() const {
2247 if (Subtarget.is64Bit())
2248 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2249 return 256;
2250}
2251
2252static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2253 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2254 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2255}
2256
2257static Constant* SegmentOffset(IRBuilder<> &IRB,
2258 unsigned Offset, unsigned AddressSpace) {
2259 return ConstantExpr::getIntToPtr(
2260 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2261 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2262}
2263
2264Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2265 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2266 // tcbhead_t; use it instead of the usual global variable (see
2267 // sysdeps/{i386,x86_64}/nptl/tls.h)
2268 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2269 if (Subtarget.isTargetFuchsia()) {
2270 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2271 return SegmentOffset(IRB, 0x10, getAddressSpace());
2272 } else {
2273 // %fs:0x28, unless we're using a Kernel code model, in which case
2274 // it's %gs:0x28. gs:0x14 on i386.
2275 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2276 return SegmentOffset(IRB, Offset, getAddressSpace());
2277 }
2278 }
2279
2280 return TargetLowering::getIRStackGuard(IRB);
2281}
2282
2283void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2284 // MSVC CRT provides functionalities for stack protection.
2285 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2286 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2287 // MSVC CRT has a global variable holding security cookie.
2288 M.getOrInsertGlobal("__security_cookie",
2289 Type::getInt8PtrTy(M.getContext()));
2290
2291 // MSVC CRT has a function to validate security cookie.
2292 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2293 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2294 Type::getInt8PtrTy(M.getContext()));
2295 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2296 F->setCallingConv(CallingConv::X86_FastCall);
2297 F->addAttribute(1, Attribute::AttrKind::InReg);
2298 }
2299 return;
2300 }
2301 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2302 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2303 return;
2304 TargetLowering::insertSSPDeclarations(M);
2305}
2306
2307Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2308 // MSVC CRT has a global variable holding security cookie.
2309 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2310 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2311 return M.getGlobalVariable("__security_cookie");
2312 }
2313 return TargetLowering::getSDagStackGuard(M);
2314}
2315
2316Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2317 // MSVC CRT has a function to validate security cookie.
2318 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2319 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2320 return M.getFunction("__security_check_cookie");
2321 }
2322 return TargetLowering::getSSPStackGuardCheck(M);
2323}
2324
2325Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2326 if (Subtarget.getTargetTriple().isOSContiki())
2327 return getDefaultSafeStackPointerLocation(IRB, false);
2328
2329 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2330 // definition of TLS_SLOT_SAFESTACK in
2331 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2332 if (Subtarget.isTargetAndroid()) {
2333 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2334 // %gs:0x24 on i386
2335 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2336 return SegmentOffset(IRB, Offset, getAddressSpace());
2337 }
2338
2339 // Fuchsia is similar.
2340 if (Subtarget.isTargetFuchsia()) {
2341 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2342 return SegmentOffset(IRB, 0x18, getAddressSpace());
2343 }
2344
2345 return TargetLowering::getSafeStackPointerLocation(IRB);
2346}
2347
2348bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2349 unsigned DestAS) const {
2350 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2350, __PRETTY_FUNCTION__))
;
2351
2352 return SrcAS < 256 && DestAS < 256;
2353}
2354
2355//===----------------------------------------------------------------------===//
2356// Return Value Calling Convention Implementation
2357//===----------------------------------------------------------------------===//
2358
2359bool X86TargetLowering::CanLowerReturn(
2360 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2361 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2362 SmallVector<CCValAssign, 16> RVLocs;
2363 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2364 return CCInfo.CheckReturn(Outs, RetCC_X86);
2365}
2366
2367const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2368 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2369 return ScratchRegs;
2370}
2371
2372/// Lowers masks values (v*i1) to the local register values
2373/// \returns DAG node after lowering to register type
2374static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2375 const SDLoc &Dl, SelectionDAG &DAG) {
2376 EVT ValVT = ValArg.getValueType();
2377
2378 if (ValVT == MVT::v1i1)
2379 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2380 DAG.getIntPtrConstant(0, Dl));
2381
2382 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2383 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2384 // Two stage lowering might be required
2385 // bitcast: v8i1 -> i8 / v16i1 -> i16
2386 // anyextend: i8 -> i32 / i16 -> i32
2387 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2388 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2389 if (ValLoc == MVT::i32)
2390 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2391 return ValToCopy;
2392 }
2393
2394 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2395 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2396 // One stage lowering is required
2397 // bitcast: v32i1 -> i32 / v64i1 -> i64
2398 return DAG.getBitcast(ValLoc, ValArg);
2399 }
2400
2401 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2402}
2403
2404/// Breaks v64i1 value into two registers and adds the new node to the DAG
2405static void Passv64i1ArgInRegs(
2406 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2407 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2408 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2409 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2409, __PRETTY_FUNCTION__))
;
2410 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2410, __PRETTY_FUNCTION__))
;
2411 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2411, __PRETTY_FUNCTION__))
;
2412 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2413, __PRETTY_FUNCTION__))
2413 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2413, __PRETTY_FUNCTION__))
;
2414
2415 // Before splitting the value we cast it to i64
2416 Arg = DAG.getBitcast(MVT::i64, Arg);
2417
2418 // Splitting the value into two i32 types
2419 SDValue Lo, Hi;
2420 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2421 DAG.getConstant(0, Dl, MVT::i32));
2422 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2423 DAG.getConstant(1, Dl, MVT::i32));
2424
2425 // Attach the two i32 types into corresponding registers
2426 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2427 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2428}
2429
2430SDValue
2431X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2432 bool isVarArg,
2433 const SmallVectorImpl<ISD::OutputArg> &Outs,
2434 const SmallVectorImpl<SDValue> &OutVals,
2435 const SDLoc &dl, SelectionDAG &DAG) const {
2436 MachineFunction &MF = DAG.getMachineFunction();
2437 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2438
2439 // In some cases we need to disable registers from the default CSR list.
2440 // For example, when they are used for argument passing.
2441 bool ShouldDisableCalleeSavedRegister =
2442 CallConv == CallingConv::X86_RegCall ||
2443 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2444
2445 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2446 report_fatal_error("X86 interrupts may not return any value");
2447
2448 SmallVector<CCValAssign, 16> RVLocs;
2449 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2450 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2451
2452 SDValue Flag;
2453 SmallVector<SDValue, 6> RetOps;
2454 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2455 // Operand #1 = Bytes To Pop
2456 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2457 MVT::i32));
2458
2459 // Copy the result values into the output registers.
2460 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2461 ++I, ++OutsIndex) {
2462 CCValAssign &VA = RVLocs[I];
2463 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2463, __PRETTY_FUNCTION__))
;
2464
2465 // Add the register to the CalleeSaveDisableRegs list.
2466 if (ShouldDisableCalleeSavedRegister)
2467 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2468
2469 SDValue ValToCopy = OutVals[OutsIndex];
2470 EVT ValVT = ValToCopy.getValueType();
2471
2472 // Promote values to the appropriate types.
2473 if (VA.getLocInfo() == CCValAssign::SExt)
2474 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2475 else if (VA.getLocInfo() == CCValAssign::ZExt)
2476 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2477 else if (VA.getLocInfo() == CCValAssign::AExt) {
2478 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2479 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2480 else
2481 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2482 }
2483 else if (VA.getLocInfo() == CCValAssign::BCvt)
2484 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2485
2486 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2487, __PRETTY_FUNCTION__))
2487 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2487, __PRETTY_FUNCTION__))
;
2488
2489 // If this is x86-64, and we disabled SSE, we can't return FP values,
2490 // or SSE or MMX vectors.
2491 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2492 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2493 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2494 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2495 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2496 } else if (ValVT == MVT::f64 &&
2497 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2498 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2499 // llvm-gcc has never done it right and no one has noticed, so this
2500 // should be OK for now.
2501 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2502 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2503 }
2504
2505 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2506 // the RET instruction and handled by the FP Stackifier.
2507 if (VA.getLocReg() == X86::FP0 ||
2508 VA.getLocReg() == X86::FP1) {
2509 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2510 // change the value to the FP stack register class.
2511 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2512 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2513 RetOps.push_back(ValToCopy);
2514 // Don't emit a copytoreg.
2515 continue;
2516 }
2517
2518 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2519 // which is returned in RAX / RDX.
2520 if (Subtarget.is64Bit()) {
2521 if (ValVT == MVT::x86mmx) {
2522 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2523 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2524 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2525 ValToCopy);
2526 // If we don't have SSE2 available, convert to v4f32 so the generated
2527 // register is legal.
2528 if (!Subtarget.hasSSE2())
2529 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2530 }
2531 }
2532 }
2533
2534 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2535
2536 if (VA.needsCustom()) {
2537 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2538, __PRETTY_FUNCTION__))
2538 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2538, __PRETTY_FUNCTION__))
;
2539
2540 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2541 Subtarget);
2542
2543 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2544, __PRETTY_FUNCTION__))
2544 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2544, __PRETTY_FUNCTION__))
;
2545
2546 // Add the second register to the CalleeSaveDisableRegs list.
2547 if (ShouldDisableCalleeSavedRegister)
2548 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2549 } else {
2550 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2551 }
2552
2553 // Add nodes to the DAG and add the values into the RetOps list
2554 for (auto &Reg : RegsToPass) {
2555 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2556 Flag = Chain.getValue(1);
2557 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2558 }
2559 }
2560
2561 // Swift calling convention does not require we copy the sret argument
2562 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2563
2564 // All x86 ABIs require that for returning structs by value we copy
2565 // the sret argument into %rax/%eax (depending on ABI) for the return.
2566 // We saved the argument into a virtual register in the entry block,
2567 // so now we copy the value out and into %rax/%eax.
2568 //
2569 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2570 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2571 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2572 // either case FuncInfo->setSRetReturnReg() will have been called.
2573 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2574 // When we have both sret and another return value, we should use the
2575 // original Chain stored in RetOps[0], instead of the current Chain updated
2576 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2577
2578 // For the case of sret and another return value, we have
2579 // Chain_0 at the function entry
2580 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2581 // If we use Chain_1 in getCopyFromReg, we will have
2582 // Val = getCopyFromReg(Chain_1)
2583 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2584
2585 // getCopyToReg(Chain_0) will be glued together with
2586 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2587 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2588 // Data dependency from Unit B to Unit A due to usage of Val in
2589 // getCopyToReg(Chain_1, Val)
2590 // Chain dependency from Unit A to Unit B
2591
2592 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2593 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2594 getPointerTy(MF.getDataLayout()));
2595
2596 unsigned RetValReg
2597 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2598 X86::RAX : X86::EAX;
2599 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2600 Flag = Chain.getValue(1);
2601
2602 // RAX/EAX now acts like a return value.
2603 RetOps.push_back(
2604 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2605
2606 // Add the returned register to the CalleeSaveDisableRegs list.
2607 if (ShouldDisableCalleeSavedRegister)
2608 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2609 }
2610
2611 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2612 const MCPhysReg *I =
2613 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2614 if (I) {
2615 for (; *I; ++I) {
2616 if (X86::GR64RegClass.contains(*I))
2617 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2618 else
2619 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2619)
;
2620 }
2621 }
2622
2623 RetOps[0] = Chain; // Update chain.
2624
2625 // Add the flag if we have it.
2626 if (Flag.getNode())
2627 RetOps.push_back(Flag);
2628
2629 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2630 if (CallConv == CallingConv::X86_INTR)
2631 opcode = X86ISD::IRET;
2632 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2633}
2634
2635bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2636 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2637 return false;
2638
2639 SDValue TCChain = Chain;
2640 SDNode *Copy = *N->use_begin();
2641 if (Copy->getOpcode() == ISD::CopyToReg) {
2642 // If the copy has a glue operand, we conservatively assume it isn't safe to
2643 // perform a tail call.
2644 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2645 return false;
2646 TCChain = Copy->getOperand(0);
2647 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2648 return false;
2649
2650 bool HasRet = false;
2651 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2652 UI != UE; ++UI) {
2653 if (UI->getOpcode() != X86ISD::RET_FLAG)
2654 return false;
2655 // If we are returning more than one value, we can definitely
2656 // not make a tail call see PR19530
2657 if (UI->getNumOperands() > 4)
2658 return false;
2659 if (UI->getNumOperands() == 4 &&
2660 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2661 return false;
2662 HasRet = true;
2663 }
2664
2665 if (!HasRet)
2666 return false;
2667
2668 Chain = TCChain;
2669 return true;
2670}
2671
2672EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2673 ISD::NodeType ExtendKind) const {
2674 MVT ReturnMVT = MVT::i32;
2675
2676 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2677 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2678 // The ABI does not require i1, i8 or i16 to be extended.
2679 //
2680 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2681 // always extending i8/i16 return values, so keep doing that for now.
2682 // (PR26665).
2683 ReturnMVT = MVT::i8;
2684 }
2685
2686 EVT MinVT = getRegisterType(Context, ReturnMVT);
2687 return VT.bitsLT(MinVT) ? MinVT : VT;
2688}
2689
2690/// Reads two 32 bit registers and creates a 64 bit mask value.
2691/// \param VA The current 32 bit value that need to be assigned.
2692/// \param NextVA The next 32 bit value that need to be assigned.
2693/// \param Root The parent DAG node.
2694/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2695/// glue purposes. In the case the DAG is already using
2696/// physical register instead of virtual, we should glue
2697/// our new SDValue to InFlag SDvalue.
2698/// \return a new SDvalue of size 64bit.
2699static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2700 SDValue &Root, SelectionDAG &DAG,
2701 const SDLoc &Dl, const X86Subtarget &Subtarget,
2702 SDValue *InFlag = nullptr) {
2703 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
;
2704 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2704, __PRETTY_FUNCTION__))
;
2705 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2706, __PRETTY_FUNCTION__))
2706 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2706, __PRETTY_FUNCTION__))
;
2707 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2708, __PRETTY_FUNCTION__))
2708 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2708, __PRETTY_FUNCTION__))
;
2709 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2710, __PRETTY_FUNCTION__))
2710 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2710, __PRETTY_FUNCTION__))
;
2711
2712 SDValue Lo, Hi;
2713 unsigned Reg;
2714 SDValue ArgValueLo, ArgValueHi;
2715
2716 MachineFunction &MF = DAG.getMachineFunction();
2717 const TargetRegisterClass *RC = &X86::GR32RegClass;
2718
2719 // Read a 32 bit value from the registers.
2720 if (nullptr == InFlag) {
2721 // When no physical register is present,
2722 // create an intermediate virtual register.
2723 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2724 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2725 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2726 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2727 } else {
2728 // When a physical register is available read the value from it and glue
2729 // the reads together.
2730 ArgValueLo =
2731 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2732 *InFlag = ArgValueLo.getValue(2);
2733 ArgValueHi =
2734 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2735 *InFlag = ArgValueHi.getValue(2);
2736 }
2737
2738 // Convert the i32 type into v32i1 type.
2739 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2740
2741 // Convert the i32 type into v32i1 type.
2742 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2743
2744 // Concatenate the two values together.
2745 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2746}
2747
2748/// The function will lower a register of various sizes (8/16/32/64)
2749/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2750/// \returns a DAG node contains the operand after lowering to mask type.
2751static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2752 const EVT &ValLoc, const SDLoc &Dl,
2753 SelectionDAG &DAG) {
2754 SDValue ValReturned = ValArg;
2755
2756 if (ValVT == MVT::v1i1)
2757 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2758
2759 if (ValVT == MVT::v64i1) {
2760 // In 32 bit machine, this case is handled by getv64i1Argument
2761 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2761, __PRETTY_FUNCTION__))
;
2762 // In 64 bit machine, There is no need to truncate the value only bitcast
2763 } else {
2764 MVT maskLen;
2765 switch (ValVT.getSimpleVT().SimpleTy) {
2766 case MVT::v8i1:
2767 maskLen = MVT::i8;
2768 break;
2769 case MVT::v16i1:
2770 maskLen = MVT::i16;
2771 break;
2772 case MVT::v32i1:
2773 maskLen = MVT::i32;
2774 break;
2775 default:
2776 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2776)
;
2777 }
2778
2779 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2780 }
2781 return DAG.getBitcast(ValVT, ValReturned);
2782}
2783
2784/// Lower the result values of a call into the
2785/// appropriate copies out of appropriate physical registers.
2786///
2787SDValue X86TargetLowering::LowerCallResult(
2788 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2789 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2790 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2791 uint32_t *RegMask) const {
2792
2793 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2794 // Assign locations to each value returned by this call.
2795 SmallVector<CCValAssign, 16> RVLocs;
2796 bool Is64Bit = Subtarget.is64Bit();
2797 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2798 *DAG.getContext());
2799 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2800
2801 // Copy all of the result registers out of their specified physreg.
2802 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2803 ++I, ++InsIndex) {
2804 CCValAssign &VA = RVLocs[I];
2805 EVT CopyVT = VA.getLocVT();
2806
2807 // In some calling conventions we need to remove the used registers
2808 // from the register mask.
2809 if (RegMask) {
2810 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2811 SubRegs.isValid(); ++SubRegs)
2812 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2813 }
2814
2815 // If this is x86-64, and we disabled SSE, we can't return FP values
2816 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2817 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2818 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2819 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2820 }
2821
2822 // If we prefer to use the value in xmm registers, copy it out as f80 and
2823 // use a truncate to move it from fp stack reg to xmm reg.
2824 bool RoundAfterCopy = false;
2825 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2826 isScalarFPTypeInSSEReg(VA.getValVT())) {
2827 if (!Subtarget.hasX87())
2828 report_fatal_error("X87 register return with X87 disabled");
2829 CopyVT = MVT::f80;
2830 RoundAfterCopy = (CopyVT != VA.getLocVT());
2831 }
2832
2833 SDValue Val;
2834 if (VA.needsCustom()) {
2835 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2836, __PRETTY_FUNCTION__))
2836 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 2836, __PRETTY_FUNCTION__))
;
2837 Val =
2838 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2839 } else {
2840 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2841 .getValue(1);
2842 Val = Chain.getValue(0);
2843 InFlag = Chain.getValue(2);
2844 }
2845
2846 if (RoundAfterCopy)
2847 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2848 // This truncation won't change the value.
2849 DAG.getIntPtrConstant(1, dl));
2850
2851 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2852 if (VA.getValVT().isVector() &&
2853 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2854 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2855 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2856 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2857 } else
2858 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2859 }
2860
2861 InVals.push_back(Val);
2862 }
2863
2864 return Chain;
2865}
2866
2867//===----------------------------------------------------------------------===//
2868// C & StdCall & Fast Calling Convention implementation
2869//===----------------------------------------------------------------------===//
2870// StdCall calling convention seems to be standard for many Windows' API
2871// routines and around. It differs from C calling convention just a little:
2872// callee should clean up the stack, not caller. Symbols should be also
2873// decorated in some fancy way :) It doesn't support any vector arguments.
2874// For info on fast calling convention see Fast Calling Convention (tail call)
2875// implementation LowerX86_32FastCCCallTo.
2876
2877/// CallIsStructReturn - Determines whether a call uses struct return
2878/// semantics.
2879enum StructReturnType {
2880 NotStructReturn,
2881 RegStructReturn,
2882 StackStructReturn
2883};
2884static StructReturnType
2885callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2886 if (Outs.empty())
2887 return NotStructReturn;
2888
2889 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2890 if (!Flags.isSRet())
2891 return NotStructReturn;
2892 if (Flags.isInReg() || IsMCU)
2893 return RegStructReturn;
2894 return StackStructReturn;
2895}
2896
2897/// Determines whether a function uses struct return semantics.
2898static StructReturnType
2899argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2900 if (Ins.empty())
2901 return NotStructReturn;
2902
2903 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2904 if (!Flags.isSRet())
2905 return NotStructReturn;
2906 if (Flags.isInReg() || IsMCU)
2907 return RegStructReturn;
2908 return StackStructReturn;
2909}
2910
2911/// Make a copy of an aggregate at address specified by "Src" to address
2912/// "Dst" with size and alignment information specified by the specific
2913/// parameter attribute. The copy will be passed as a byval function parameter.
2914static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2915 SDValue Chain, ISD::ArgFlagsTy Flags,
2916 SelectionDAG &DAG, const SDLoc &dl) {
2917 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2918
2919 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2920 /*isVolatile*/false, /*AlwaysInline=*/true,
2921 /*isTailCall*/false,
2922 MachinePointerInfo(), MachinePointerInfo());
2923}
2924
2925/// Return true if the calling convention is one that we can guarantee TCO for.
2926static bool canGuaranteeTCO(CallingConv::ID CC) {
2927 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2928 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2929 CC == CallingConv::HHVM);
2930}
2931
2932/// Return true if we might ever do TCO for calls with this calling convention.
2933static bool mayTailCallThisCC(CallingConv::ID CC) {
2934 switch (CC) {
2935 // C calling conventions:
2936 case CallingConv::C:
2937 case CallingConv::Win64:
2938 case CallingConv::X86_64_SysV:
2939 // Callee pop conventions:
2940 case CallingConv::X86_ThisCall:
2941 case CallingConv::X86_StdCall:
2942 case CallingConv::X86_VectorCall:
2943 case CallingConv::X86_FastCall:
2944 // Swift:
2945 case CallingConv::Swift:
2946 return true;
2947 default:
2948 return canGuaranteeTCO(CC);
2949 }
2950}
2951
2952/// Return true if the function is being made into a tailcall target by
2953/// changing its ABI.
2954static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2955 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2956}
2957
2958bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2959 auto Attr =
2960 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2961 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2962 return false;
2963
2964 ImmutableCallSite CS(CI);
2965 CallingConv::ID CalleeCC = CS.getCallingConv();
2966 if (!mayTailCallThisCC(CalleeCC))
2967 return false;
2968
2969 return true;
2970}
2971
2972SDValue
2973X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2974 const SmallVectorImpl<ISD::InputArg> &Ins,
2975 const SDLoc &dl, SelectionDAG &DAG,
2976 const CCValAssign &VA,
2977 MachineFrameInfo &MFI, unsigned i) const {
2978 // Create the nodes corresponding to a load from this parameter slot.
2979 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2980 bool AlwaysUseMutable = shouldGuaranteeTCO(
2981 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2982 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2983 EVT ValVT;
2984 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2985
2986 // If value is passed by pointer we have address passed instead of the value
2987 // itself. No need to extend if the mask value and location share the same
2988 // absolute size.
2989 bool ExtendedInMem =
2990 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2991 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2992
2993 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2994 ValVT = VA.getLocVT();
2995 else
2996 ValVT = VA.getValVT();
2997
2998 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2999 // changed with more analysis.
3000 // In case of tail call optimization mark all arguments mutable. Since they
3001 // could be overwritten by lowering of arguments in case of a tail call.
3002 if (Flags.isByVal()) {
3003 unsigned Bytes = Flags.getByValSize();
3004 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3005
3006 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3007 // can be improved with deeper analysis.
3008 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3009 /*isAliased=*/true);
3010 return DAG.getFrameIndex(FI, PtrVT);
3011 }
3012
3013 // This is an argument in memory. We might be able to perform copy elision.
3014 // If the argument is passed directly in memory without any extension, then we
3015 // can perform copy elision. Large vector types, for example, may be passed
3016 // indirectly by pointer.
3017 if (Flags.isCopyElisionCandidate() &&
3018 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3019 EVT ArgVT = Ins[i].ArgVT;
3020 SDValue PartAddr;
3021 if (Ins[i].PartOffset == 0) {
3022 // If this is a one-part value or the first part of a multi-part value,
3023 // create a stack object for the entire argument value type and return a
3024 // load from our portion of it. This assumes that if the first part of an
3025 // argument is in memory, the rest will also be in memory.
3026 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3027 /*Immutable=*/false);
3028 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3029 return DAG.getLoad(
3030 ValVT, dl, Chain, PartAddr,
3031 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3032 } else {
3033 // This is not the first piece of an argument in memory. See if there is
3034 // already a fixed stack object including this offset. If so, assume it
3035 // was created by the PartOffset == 0 branch above and create a load from
3036 // the appropriate offset into it.
3037 int64_t PartBegin = VA.getLocMemOffset();
3038 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3039 int FI = MFI.getObjectIndexBegin();
3040 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3041 int64_t ObjBegin = MFI.getObjectOffset(FI);
3042 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3043 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3044 break;
3045 }
3046 if (MFI.isFixedObjectIndex(FI)) {
3047 SDValue Addr =
3048 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3049 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3050 return DAG.getLoad(
3051 ValVT, dl, Chain, Addr,
3052 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3053 Ins[i].PartOffset));
3054 }
3055 }
3056 }
3057
3058 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3059 VA.getLocMemOffset(), isImmutable);
3060
3061 // Set SExt or ZExt flag.
3062 if (VA.getLocInfo() == CCValAssign::ZExt) {
3063 MFI.setObjectZExt(FI, true);
3064 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3065 MFI.setObjectSExt(FI, true);
3066 }
3067
3068 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3069 SDValue Val = DAG.getLoad(
3070 ValVT, dl, Chain, FIN,
3071 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3072 return ExtendedInMem
3073 ? (VA.getValVT().isVector()
3074 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3075 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3076 : Val;
3077}
3078
3079// FIXME: Get this from tablegen.
3080static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3081 const X86Subtarget &Subtarget) {
3082 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3082, __PRETTY_FUNCTION__))
;
3083
3084 if (Subtarget.isCallingConvWin64(CallConv)) {
3085 static const MCPhysReg GPR64ArgRegsWin64[] = {
3086 X86::RCX, X86::RDX, X86::R8, X86::R9
3087 };
3088 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3089 }
3090
3091 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3092 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3093 };
3094 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3095}
3096
3097// FIXME: Get this from tablegen.
3098static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3099 CallingConv::ID CallConv,
3100 const X86Subtarget &Subtarget) {
3101 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3101, __PRETTY_FUNCTION__))
;
3102 if (Subtarget.isCallingConvWin64(CallConv)) {
3103 // The XMM registers which might contain var arg parameters are shadowed
3104 // in their paired GPR. So we only need to save the GPR to their home
3105 // slots.
3106 // TODO: __vectorcall will change this.
3107 return None;
3108 }
3109
3110 const Function &F = MF.getFunction();
3111 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3112 bool isSoftFloat = Subtarget.useSoftFloat();
3113 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3114, __PRETTY_FUNCTION__))
3114 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3114, __PRETTY_FUNCTION__))
;
3115 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3116 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3117 // registers.
3118 return None;
3119
3120 static const MCPhysReg XMMArgRegs64Bit[] = {
3121 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3122 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3123 };
3124 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3125}
3126
3127#ifndef NDEBUG
3128static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3129 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3130 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3131 return A.getValNo() < B.getValNo();
3132 });
3133}
3134#endif
3135
3136SDValue X86TargetLowering::LowerFormalArguments(
3137 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3138 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3139 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3140 MachineFunction &MF = DAG.getMachineFunction();
3141 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3142 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3143
3144 const Function &F = MF.getFunction();
3145 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3146 F.getName() == "main")
3147 FuncInfo->setForceFramePointer(true);
3148
3149 MachineFrameInfo &MFI = MF.getFrameInfo();
3150 bool Is64Bit = Subtarget.is64Bit();
3151 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3152
3153 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3155, __PRETTY_FUNCTION__))
3154 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3155, __PRETTY_FUNCTION__))
3155 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3155, __PRETTY_FUNCTION__))
;
3156
3157 // Assign locations to all of the incoming arguments.
3158 SmallVector<CCValAssign, 16> ArgLocs;
3159 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3160
3161 // Allocate shadow area for Win64.
3162 if (IsWin64)
3163 CCInfo.AllocateStack(32, 8);
3164
3165 CCInfo.AnalyzeArguments(Ins, CC_X86);
3166
3167 // In vectorcall calling convention a second pass is required for the HVA
3168 // types.
3169 if (CallingConv::X86_VectorCall == CallConv) {
3170 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3171 }
3172
3173 // The next loop assumes that the locations are in the same order of the
3174 // input arguments.
3175 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3176, __PRETTY_FUNCTION__))
3176 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3176, __PRETTY_FUNCTION__))
;
3177
3178 SDValue ArgValue;
3179 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3180 ++I, ++InsIndex) {
3181 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3181, __PRETTY_FUNCTION__))
;
3182 CCValAssign &VA = ArgLocs[I];
3183
3184 if (VA.isRegLoc()) {
3185 EVT RegVT = VA.getLocVT();
3186 if (VA.needsCustom()) {
3187 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3189, __PRETTY_FUNCTION__))
3188 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3189, __PRETTY_FUNCTION__))
3189 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3189, __PRETTY_FUNCTION__))
;
3190
3191 // v64i1 values, in regcall calling convention, that are
3192 // compiled to 32 bit arch, are split up into two registers.
3193 ArgValue =
3194 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3195 } else {
3196 const TargetRegisterClass *RC;
3197 if (RegVT == MVT::i8)
3198 RC = &X86::GR8RegClass;
3199 else if (RegVT == MVT::i16)
3200 RC = &X86::GR16RegClass;
3201 else if (RegVT == MVT::i32)
3202 RC = &X86::GR32RegClass;
3203 else if (Is64Bit && RegVT == MVT::i64)
3204 RC = &X86::GR64RegClass;
3205 else if (RegVT == MVT::f32)
3206 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3207 else if (RegVT == MVT::f64)
3208 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3209 else if (RegVT == MVT::f80)
3210 RC = &X86::RFP80RegClass;
3211 else if (RegVT == MVT::f128)
3212 RC = &X86::VR128RegClass;
3213 else if (RegVT.is512BitVector())
3214 RC = &X86::VR512RegClass;
3215 else if (RegVT.is256BitVector())
3216 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3217 else if (RegVT.is128BitVector())
3218 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3219 else if (RegVT == MVT::x86mmx)
3220 RC = &X86::VR64RegClass;
3221 else if (RegVT == MVT::v1i1)
3222 RC = &X86::VK1RegClass;
3223 else if (RegVT == MVT::v8i1)
3224 RC = &X86::VK8RegClass;
3225 else if (RegVT == MVT::v16i1)
3226 RC = &X86::VK16RegClass;
3227 else if (RegVT == MVT::v32i1)
3228 RC = &X86::VK32RegClass;
3229 else if (RegVT == MVT::v64i1)
3230 RC = &X86::VK64RegClass;
3231 else
3232 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3232)
;
3233
3234 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3235 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3236 }
3237
3238 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3239 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3240 // right size.
3241 if (VA.getLocInfo() == CCValAssign::SExt)
3242 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3243 DAG.getValueType(VA.getValVT()));
3244 else if (VA.getLocInfo() == CCValAssign::ZExt)
3245 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3246 DAG.getValueType(VA.getValVT()));
3247 else if (VA.getLocInfo() == CCValAssign::BCvt)
3248 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3249
3250 if (VA.isExtInLoc()) {
3251 // Handle MMX values passed in XMM regs.
3252 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3253 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3254 else if (VA.getValVT().isVector() &&
3255 VA.getValVT().getScalarType() == MVT::i1 &&
3256 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3257 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3258 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3259 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3260 } else
3261 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3262 }
3263 } else {
3264 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3264, __PRETTY_FUNCTION__))
;
3265 ArgValue =
3266 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3267 }
3268
3269 // If value is passed via pointer - do a load.
3270 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3271 ArgValue =
3272 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3273
3274 InVals.push_back(ArgValue);
3275 }
3276
3277 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3278 // Swift calling convention does not require we copy the sret argument
3279 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3280 if (CallConv == CallingConv::Swift)
3281 continue;
3282
3283 // All x86 ABIs require that for returning structs by value we copy the
3284 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3285 // the argument into a virtual register so that we can access it from the
3286 // return points.
3287 if (Ins[I].Flags.isSRet()) {
3288 unsigned Reg = FuncInfo->getSRetReturnReg();
3289 if (!Reg) {
3290 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3291 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3292 FuncInfo->setSRetReturnReg(Reg);
3293 }
3294 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3295 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3296 break;
3297 }
3298 }
3299
3300 unsigned StackSize = CCInfo.getNextStackOffset();
3301 // Align stack specially for tail calls.
3302 if (shouldGuaranteeTCO(CallConv,
3303 MF.getTarget().Options.GuaranteedTailCallOpt))
3304 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3305
3306 // If the function takes variable number of arguments, make a frame index for
3307 // the start of the first vararg value... for expansion of llvm.va_start. We
3308 // can skip this if there are no va_start calls.
3309 if (MFI.hasVAStart() &&
3310 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3311 CallConv != CallingConv::X86_ThisCall))) {
3312 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3313 }
3314
3315 // Figure out if XMM registers are in use.
3316 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3318, __PRETTY_FUNCTION__))
3317 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3318, __PRETTY_FUNCTION__))
3318 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3318, __PRETTY_FUNCTION__))
;
3319
3320 // 64-bit calling conventions support varargs and register parameters, so we
3321 // have to do extra work to spill them in the prologue.
3322 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3323 // Find the first unallocated argument registers.
3324 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3325 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3326 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3327 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3328 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3329, __PRETTY_FUNCTION__))
3329 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3329, __PRETTY_FUNCTION__))
;
3330
3331 // Gather all the live in physical registers.
3332 SmallVector<SDValue, 6> LiveGPRs;
3333 SmallVector<SDValue, 8> LiveXMMRegs;
3334 SDValue ALVal;
3335 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3336 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3337 LiveGPRs.push_back(
3338 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3339 }
3340 if (!ArgXMMs.empty()) {
3341 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3342 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3343 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3344 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3345 LiveXMMRegs.push_back(
3346 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3347 }
3348 }
3349
3350 if (IsWin64) {
3351 // Get to the caller-allocated home save location. Add 8 to account
3352 // for the return address.
3353 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3354 FuncInfo->setRegSaveFrameIndex(
3355 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3356 // Fixup to set vararg frame on shadow area (4 x i64).
3357 if (NumIntRegs < 4)
3358 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3359 } else {
3360 // For X86-64, if there are vararg parameters that are passed via
3361 // registers, then we must store them to their spots on the stack so
3362 // they may be loaded by dereferencing the result of va_next.
3363 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3364 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3365 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3366 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3367 }
3368
3369 // Store the integer parameter registers.
3370 SmallVector<SDValue, 8> MemOps;
3371 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3372 getPointerTy(DAG.getDataLayout()));
3373 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3374 for (SDValue Val : LiveGPRs) {
3375 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3376 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3377 SDValue Store =
3378 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3379 MachinePointerInfo::getFixedStack(
3380 DAG.getMachineFunction(),
3381 FuncInfo->getRegSaveFrameIndex(), Offset));
3382 MemOps.push_back(Store);
3383 Offset += 8;
3384 }
3385
3386 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3387 // Now store the XMM (fp + vector) parameter registers.
3388 SmallVector<SDValue, 12> SaveXMMOps;
3389 SaveXMMOps.push_back(Chain);
3390 SaveXMMOps.push_back(ALVal);
3391 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3392 FuncInfo->getRegSaveFrameIndex(), dl));
3393 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3394 FuncInfo->getVarArgsFPOffset(), dl));
3395 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3396 LiveXMMRegs.end());
3397 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3398 MVT::Other, SaveXMMOps));
3399 }
3400
3401 if (!MemOps.empty())
3402 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3403 }
3404
3405 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3406 // Find the largest legal vector type.
3407 MVT VecVT = MVT::Other;
3408 // FIXME: Only some x86_32 calling conventions support AVX512.
3409 if (Subtarget.hasAVX512() &&
3410 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3411 CallConv == CallingConv::Intel_OCL_BI)))
3412 VecVT = MVT::v16f32;
3413 else if (Subtarget.hasAVX())
3414 VecVT = MVT::v8f32;
3415 else if (Subtarget.hasSSE2())
3416 VecVT = MVT::v4f32;
3417
3418 // We forward some GPRs and some vector types.
3419 SmallVector<MVT, 2> RegParmTypes;
3420 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3421 RegParmTypes.push_back(IntVT);
3422 if (VecVT != MVT::Other)
3423 RegParmTypes.push_back(VecVT);
3424
3425 // Compute the set of forwarded registers. The rest are scratch.
3426 SmallVectorImpl<ForwardedRegister> &Forwards =
3427 FuncInfo->getForwardedMustTailRegParms();
3428 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3429
3430 // Conservatively forward AL on x86_64, since it might be used for varargs.
3431 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3432 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3433 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3434 }
3435
3436 // Copy all forwards from physical to virtual registers.
3437 for (ForwardedRegister &F : Forwards) {
3438 // FIXME: Can we use a less constrained schedule?
3439 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3440 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3441 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3442 }
3443 }
3444
3445 // Some CCs need callee pop.
3446 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3447 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3448 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3449 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3450 // X86 interrupts must pop the error code (and the alignment padding) if
3451 // present.
3452 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3453 } else {
3454 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3455 // If this is an sret function, the return should pop the hidden pointer.
3456 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3457 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3458 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3459 FuncInfo->setBytesToPopOnReturn(4);
3460 }
3461
3462 if (!Is64Bit) {
3463 // RegSaveFrameIndex is X86-64 only.
3464 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3465 if (CallConv == CallingConv::X86_FastCall ||
3466 CallConv == CallingConv::X86_ThisCall)
3467 // fastcc functions can't have varargs.
3468 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3469 }
3470
3471 FuncInfo->setArgumentStackSize(StackSize);
3472
3473 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3474 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3475 if (Personality == EHPersonality::CoreCLR) {
3476 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3476, __PRETTY_FUNCTION__))
;
3477 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3478 // that we'd prefer this slot be allocated towards the bottom of the frame
3479 // (i.e. near the stack pointer after allocating the frame). Every
3480 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3481 // offset from the bottom of this and each funclet's frame must be the
3482 // same, so the size of funclets' (mostly empty) frames is dictated by
3483 // how far this slot is from the bottom (since they allocate just enough
3484 // space to accommodate holding this slot at the correct offset).
3485 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3486 EHInfo->PSPSymFrameIdx = PSPSymFI;
3487 }
3488 }
3489
3490 if (CallConv == CallingConv::X86_RegCall ||
3491 F.hasFnAttribute("no_caller_saved_registers")) {
3492 MachineRegisterInfo &MRI = MF.getRegInfo();
3493 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3494 MRI.disableCalleeSavedRegister(Pair.first);
3495 }
3496
3497 return Chain;
3498}
3499
3500SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3501 SDValue Arg, const SDLoc &dl,
3502 SelectionDAG &DAG,
3503 const CCValAssign &VA,
3504 ISD::ArgFlagsTy Flags) const {
3505 unsigned LocMemOffset = VA.getLocMemOffset();
3506 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3507 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3508 StackPtr, PtrOff);
3509 if (Flags.isByVal())
3510 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3511
3512 return DAG.getStore(
3513 Chain, dl, Arg, PtrOff,
3514 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3515}
3516
3517/// Emit a load of return address if tail call
3518/// optimization is performed and it is required.
3519SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3520 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3521 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3522 // Adjust the Return address stack slot.
3523 EVT VT = getPointerTy(DAG.getDataLayout());
3524 OutRetAddr = getReturnAddressFrameIndex(DAG);
3525
3526 // Load the "old" Return address.
3527 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3528 return SDValue(OutRetAddr.getNode(), 1);
3529}
3530
3531/// Emit a store of the return address if tail call
3532/// optimization is performed and it is required (FPDiff!=0).
3533static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3534 SDValue Chain, SDValue RetAddrFrIdx,
3535 EVT PtrVT, unsigned SlotSize,
3536 int FPDiff, const SDLoc &dl) {
3537 // Store the return address to the appropriate stack slot.
3538 if (!FPDiff) return Chain;
3539 // Calculate the new stack slot for the return address.
3540 int NewReturnAddrFI =
3541 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3542 false);
3543 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3544 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3545 MachinePointerInfo::getFixedStack(
3546 DAG.getMachineFunction(), NewReturnAddrFI));
3547 return Chain;
3548}
3549
3550/// Returns a vector_shuffle mask for an movs{s|d}, movd
3551/// operation of specified width.
3552static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3553 SDValue V2) {
3554 unsigned NumElems = VT.getVectorNumElements();
3555 SmallVector<int, 8> Mask;
3556 Mask.push_back(NumElems);
3557 for (unsigned i = 1; i != NumElems; ++i)
3558 Mask.push_back(i);
3559 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3560}
3561
3562SDValue
3563X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3564 SmallVectorImpl<SDValue> &InVals) const {
3565 SelectionDAG &DAG = CLI.DAG;
3566 SDLoc &dl = CLI.DL;
3567 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3568 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3569 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3570 SDValue Chain = CLI.Chain;
3571 SDValue Callee = CLI.Callee;
3572 CallingConv::ID CallConv = CLI.CallConv;
3573 bool &isTailCall = CLI.IsTailCall;
3574 bool isVarArg = CLI.IsVarArg;
3575
3576 MachineFunction &MF = DAG.getMachineFunction();
3577 bool Is64Bit = Subtarget.is64Bit();
3578 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3579 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3580 bool IsSibcall = false;
3581 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3582 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3583 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3584 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3585 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3586 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3587 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3588 bool HasNoCfCheck =
3589 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3590 const Module *M = MF.getMMI().getModule();
3591 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3592
3593 if (CallConv == CallingConv::X86_INTR)
3594 report_fatal_error("X86 interrupts may not be called directly");
3595
3596 if (Attr.getValueAsString() == "true")
3597 isTailCall = false;
3598
3599 if (Subtarget.isPICStyleGOT() &&
3600 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3601 // If we are using a GOT, disable tail calls to external symbols with
3602 // default visibility. Tail calling such a symbol requires using a GOT
3603 // relocation, which forces early binding of the symbol. This breaks code
3604 // that require lazy function symbol resolution. Using musttail or
3605 // GuaranteedTailCallOpt will override this.
3606 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3607 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3608 G->getGlobal()->hasDefaultVisibility()))
3609 isTailCall = false;
3610 }
3611
3612 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3613 if (IsMustTail) {
3614 // Force this to be a tail call. The verifier rules are enough to ensure
3615 // that we can lower this successfully without moving the return address
3616 // around.
3617 isTailCall = true;
3618 } else if (isTailCall) {
3619 // Check if it's really possible to do a tail call.
3620 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3621 isVarArg, SR != NotStructReturn,
3622 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3623 Outs, OutVals, Ins, DAG);
3624
3625 // Sibcalls are automatically detected tailcalls which do not require
3626 // ABI changes.
3627 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3628 IsSibcall = true;
3629
3630 if (isTailCall)
3631 ++NumTailCalls;
3632 }
3633
3634 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3635, __PRETTY_FUNCTION__))
3635 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3635, __PRETTY_FUNCTION__))
;
3636
3637 // Analyze operands of the call, assigning locations to each operand.
3638 SmallVector<CCValAssign, 16> ArgLocs;
3639 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3640
3641 // Allocate shadow area for Win64.
3642 if (IsWin64)
3643 CCInfo.AllocateStack(32, 8);
3644
3645 CCInfo.AnalyzeArguments(Outs, CC_X86);
3646
3647 // In vectorcall calling convention a second pass is required for the HVA
3648 // types.
3649 if (CallingConv::X86_VectorCall == CallConv) {
3650 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3651 }
3652
3653 // Get a count of how many bytes are to be pushed on the stack.
3654 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3655 if (IsSibcall)
3656 // This is a sibcall. The memory operands are available in caller's
3657 // own caller's stack.
3658 NumBytes = 0;
3659 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3660 canGuaranteeTCO(CallConv))
3661 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3662
3663 int FPDiff = 0;
3664 if (isTailCall && !IsSibcall && !IsMustTail) {
3665 // Lower arguments at fp - stackoffset + fpdiff.
3666 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3667
3668 FPDiff = NumBytesCallerPushed - NumBytes;
3669
3670 // Set the delta of movement of the returnaddr stackslot.
3671 // But only set if delta is greater than previous delta.
3672 if (FPDiff < X86Info->getTCReturnAddrDelta())
3673 X86Info->setTCReturnAddrDelta(FPDiff);
3674 }
3675
3676 unsigned NumBytesToPush = NumBytes;
3677 unsigned NumBytesToPop = NumBytes;
3678
3679 // If we have an inalloca argument, all stack space has already been allocated
3680 // for us and be right at the top of the stack. We don't support multiple
3681 // arguments passed in memory when using inalloca.
3682 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3683 NumBytesToPush = 0;
3684 if (!ArgLocs.back().isMemLoc())
3685 report_fatal_error("cannot use inalloca attribute on a register "
3686 "parameter");
3687 if (ArgLocs.back().getLocMemOffset() != 0)
3688 report_fatal_error("any parameter with the inalloca attribute must be "
3689 "the only memory argument");
3690 }
3691
3692 if (!IsSibcall)
3693 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3694 NumBytes - NumBytesToPush, dl);
3695
3696 SDValue RetAddrFrIdx;
3697 // Load return address for tail calls.
3698 if (isTailCall && FPDiff)
3699 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3700 Is64Bit, FPDiff, dl);
3701
3702 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3703 SmallVector<SDValue, 8> MemOpChains;
3704 SDValue StackPtr;
3705
3706 // The next loop assumes that the locations are in the same order of the
3707 // input arguments.
3708 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3709, __PRETTY_FUNCTION__))
3709 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3709, __PRETTY_FUNCTION__))
;
3710
3711 // Walk the register/memloc assignments, inserting copies/loads. In the case
3712 // of tail call optimization arguments are handle later.
3713 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3714 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3715 ++I, ++OutIndex) {
3716 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3716, __PRETTY_FUNCTION__))
;
3717 // Skip inalloca arguments, they have already been written.
3718 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3719 if (Flags.isInAlloca())
3720 continue;
3721
3722 CCValAssign &VA = ArgLocs[I];
3723 EVT RegVT = VA.getLocVT();
3724 SDValue Arg = OutVals[OutIndex];
3725 bool isByVal = Flags.isByVal();
3726
3727 // Promote the value if needed.
3728 switch (VA.getLocInfo()) {
3729 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3729)
;
3730 case CCValAssign::Full: break;
3731 case CCValAssign::SExt:
3732 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3733 break;
3734 case CCValAssign::ZExt:
3735 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3736 break;
3737 case CCValAssign::AExt:
3738 if (Arg.getValueType().isVector() &&
3739 Arg.getValueType().getVectorElementType() == MVT::i1)
3740 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3741 else if (RegVT.is128BitVector()) {
3742 // Special case: passing MMX values in XMM registers.
3743 Arg = DAG.getBitcast(MVT::i64, Arg);
3744 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3745 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3746 } else
3747 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3748 break;
3749 case CCValAssign::BCvt:
3750 Arg = DAG.getBitcast(RegVT, Arg);
3751 break;
3752 case CCValAssign::Indirect: {
3753 if (isByVal) {
3754 // Memcpy the argument to a temporary stack slot to prevent
3755 // the caller from seeing any modifications the callee may make
3756 // as guaranteed by the `byval` attribute.
3757 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3758 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3759 false);
3760 SDValue StackSlot =
3761 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3762 Chain =
3763 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3764 // From now on treat this as a regular pointer
3765 Arg = StackSlot;
3766 isByVal = false;
3767 } else {
3768 // Store the argument.
3769 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3770 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3771 Chain = DAG.getStore(
3772 Chain, dl, Arg, SpillSlot,
3773 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3774 Arg = SpillSlot;
3775 }
3776 break;
3777 }
3778 }
3779
3780 if (VA.needsCustom()) {
3781 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3782, __PRETTY_FUNCTION__))
3782 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3782, __PRETTY_FUNCTION__))
;
3783 // Split v64i1 value into two registers
3784 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3785 Subtarget);
3786 } else if (VA.isRegLoc()) {
3787 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3788 if (isVarArg && IsWin64) {
3789 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3790 // shadow reg if callee is a varargs function.
3791 unsigned ShadowReg = 0;
3792 switch (VA.getLocReg()) {
3793 case X86::XMM0: ShadowReg = X86::RCX; break;
3794 case X86::XMM1: ShadowReg = X86::RDX; break;
3795 case X86::XMM2: ShadowReg = X86::R8; break;
3796 case X86::XMM3: ShadowReg = X86::R9; break;
3797 }
3798 if (ShadowReg)
3799 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3800 }
3801 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3802 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3802, __PRETTY_FUNCTION__))
;
3803 if (!StackPtr.getNode())
3804 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3805 getPointerTy(DAG.getDataLayout()));
3806 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3807 dl, DAG, VA, Flags));
3808 }
3809 }
3810
3811 if (!MemOpChains.empty())
3812 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3813
3814 if (Subtarget.isPICStyleGOT()) {
3815 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3816 // GOT pointer.
3817 if (!isTailCall) {
3818 RegsToPass.push_back(std::make_pair(
3819 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3820 getPointerTy(DAG.getDataLayout()))));
3821 } else {
3822 // If we are tail calling and generating PIC/GOT style code load the
3823 // address of the callee into ECX. The value in ecx is used as target of
3824 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3825 // for tail calls on PIC/GOT architectures. Normally we would just put the
3826 // address of GOT into ebx and then call target@PLT. But for tail calls
3827 // ebx would be restored (since ebx is callee saved) before jumping to the
3828 // target@PLT.
3829
3830 // Note: The actual moving to ECX is done further down.
3831 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3832 if (G && !G->getGlobal()->hasLocalLinkage() &&
3833 G->getGlobal()->hasDefaultVisibility())
3834 Callee = LowerGlobalAddress(Callee, DAG);
3835 else if (isa<ExternalSymbolSDNode>(Callee))
3836 Callee = LowerExternalSymbol(Callee, DAG);
3837 }
3838 }
3839
3840 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3841 // From AMD64 ABI document:
3842 // For calls that may call functions that use varargs or stdargs
3843 // (prototype-less calls or calls to functions containing ellipsis (...) in
3844 // the declaration) %al is used as hidden argument to specify the number
3845 // of SSE registers used. The contents of %al do not need to match exactly
3846 // the number of registers, but must be an ubound on the number of SSE
3847 // registers used and is in the range 0 - 8 inclusive.
3848
3849 // Count the number of XMM registers allocated.
3850 static const MCPhysReg XMMArgRegs[] = {
3851 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3852 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3853 };
3854 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3855 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3856, __PRETTY_FUNCTION__))
3856 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3856, __PRETTY_FUNCTION__))
;
3857
3858 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3859 DAG.getConstant(NumXMMRegs, dl,
3860 MVT::i8)));
3861 }
3862
3863 if (isVarArg && IsMustTail) {
3864 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3865 for (const auto &F : Forwards) {
3866 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3867 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3868 }
3869 }
3870
3871 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3872 // don't need this because the eligibility check rejects calls that require
3873 // shuffling arguments passed in memory.
3874 if (!IsSibcall && isTailCall) {
3875 // Force all the incoming stack arguments to be loaded from the stack
3876 // before any new outgoing arguments are stored to the stack, because the
3877 // outgoing stack slots may alias the incoming argument stack slots, and
3878 // the alias isn't otherwise explicit. This is slightly more conservative
3879 // than necessary, because it means that each store effectively depends
3880 // on every argument instead of just those arguments it would clobber.
3881 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3882
3883 SmallVector<SDValue, 8> MemOpChains2;
3884 SDValue FIN;
3885 int FI = 0;
3886 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3887 ++I, ++OutsIndex) {
3888 CCValAssign &VA = ArgLocs[I];
3889
3890 if (VA.isRegLoc()) {
3891 if (VA.needsCustom()) {
3892 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3893, __PRETTY_FUNCTION__))
3893 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3893, __PRETTY_FUNCTION__))
;
3894 // This means that we are in special case where one argument was
3895 // passed through two register locations - Skip the next location
3896 ++I;
3897 }
3898
3899 continue;
3900 }
3901
3902 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3902, __PRETTY_FUNCTION__))
;
3903 SDValue Arg = OutVals[OutsIndex];
3904 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3905 // Skip inalloca arguments. They don't require any work.
3906 if (Flags.isInAlloca())
3907 continue;
3908 // Create frame index.
3909 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3910 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3911 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3912 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3913
3914 if (Flags.isByVal()) {
3915 // Copy relative to framepointer.
3916 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3917 if (!StackPtr.getNode())
3918 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3919 getPointerTy(DAG.getDataLayout()));
3920 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3921 StackPtr, Source);
3922
3923 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3924 ArgChain,
3925 Flags, DAG, dl));
3926 } else {
3927 // Store relative to framepointer.
3928 MemOpChains2.push_back(DAG.getStore(
3929 ArgChain, dl, Arg, FIN,
3930 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3931 }
3932 }
3933
3934 if (!MemOpChains2.empty())
3935 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3936
3937 // Store the return address to the appropriate stack slot.
3938 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3939 getPointerTy(DAG.getDataLayout()),
3940 RegInfo->getSlotSize(), FPDiff, dl);
3941 }
3942
3943 // Build a sequence of copy-to-reg nodes chained together with token chain
3944 // and flag operands which copy the outgoing args into registers.
3945 SDValue InFlag;
3946 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3947 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3948 RegsToPass[i].second, InFlag);
3949 InFlag = Chain.getValue(1);
3950 }
3951
3952 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3953 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 3953, __PRETTY_FUNCTION__))
;
3954 // In the 64-bit large code model, we have to make all calls
3955 // through a register, since the call instruction's 32-bit
3956 // pc-relative offset may not be large enough to hold the whole
3957 // address.
3958 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3959 // If the callee is a GlobalAddress node (quite common, every direct call
3960 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3961 // it.
3962 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3963
3964 // We should use extra load for direct calls to dllimported functions in
3965 // non-JIT mode.
3966 const GlobalValue *GV = G->getGlobal();
3967 if (!GV->hasDLLImportStorageClass()) {
3968 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3969
3970 Callee = DAG.getTargetGlobalAddress(
3971 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3972
3973 if (OpFlags == X86II::MO_GOTPCREL) {
3974 // Add a wrapper.
3975 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3976 getPointerTy(DAG.getDataLayout()), Callee);
3977 // Add extra indirection
3978 Callee = DAG.getLoad(
3979 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3980 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3981 }
3982 }
3983 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3984 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3985 unsigned char OpFlags =
3986 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3987
3988 Callee = DAG.getTargetExternalSymbol(
3989 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3990
3991 if (OpFlags == X86II::MO_GOTPCREL) {
3992 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3993 getPointerTy(DAG.getDataLayout()), Callee);
3994 Callee = DAG.getLoad(
3995 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3996 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3997 }
3998 } else if (Subtarget.isTarget64BitILP32() &&
3999 Callee->getValueType(0) == MVT::i32) {
4000 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4001 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4002 }
4003
4004 // Returns a chain & a flag for retval copy to use.
4005 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4006 SmallVector<SDValue, 8> Ops;
4007
4008 if (!IsSibcall && isTailCall) {
4009 Chain = DAG.getCALLSEQ_END(Chain,
4010 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4011 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4012 InFlag = Chain.getValue(1);
4013 }
4014
4015 Ops.push_back(Chain);
4016 Ops.push_back(Callee);
4017
4018 if (isTailCall)
4019 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
4020
4021 // Add argument registers to the end of the list so that they are known live
4022 // into the call.
4023 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4024 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4025 RegsToPass[i].second.getValueType()));
4026
4027 // Add a register mask operand representing the call-preserved registers.
4028 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
4029 // set X86_INTR calling convention because it has the same CSR mask
4030 // (same preserved registers).
4031 const uint32_t *Mask = RegInfo->getCallPreservedMask(
4032 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
4033 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4033, __PRETTY_FUNCTION__))
;
4034
4035 // If this is an invoke in a 32-bit function using a funclet-based
4036 // personality, assume the function clobbers all registers. If an exception
4037 // is thrown, the runtime will not restore CSRs.
4038 // FIXME: Model this more precisely so that we can register allocate across
4039 // the normal edge and spill and fill across the exceptional edge.
4040 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4041 const Function &CallerFn = MF.getFunction();
4042 EHPersonality Pers =
4043 CallerFn.hasPersonalityFn()
4044 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4045 : EHPersonality::Unknown;
4046 if (isFuncletEHPersonality(Pers))
4047 Mask = RegInfo->getNoPreservedMask();
4048 }
4049
4050 // Define a new register mask from the existing mask.
4051 uint32_t *RegMask = nullptr;
4052
4053 // In some calling conventions we need to remove the used physical registers
4054 // from the reg mask.
4055 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4056 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4057
4058 // Allocate a new Reg Mask and copy Mask.
4059 RegMask = MF.allocateRegMask();
4060 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4061 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4062
4063 // Make sure all sub registers of the argument registers are reset
4064 // in the RegMask.
4065 for (auto const &RegPair : RegsToPass)
4066 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4067 SubRegs.isValid(); ++SubRegs)
4068 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4069
4070 // Create the RegMask Operand according to our updated mask.
4071 Ops.push_back(DAG.getRegisterMask(RegMask));
4072 } else {
4073 // Create the RegMask Operand according to the static mask.
4074 Ops.push_back(DAG.getRegisterMask(Mask));
4075 }
4076
4077 if (InFlag.getNode())
4078 Ops.push_back(InFlag);
4079
4080 if (isTailCall) {
4081 // We used to do:
4082 //// If this is the first return lowered for this function, add the regs
4083 //// to the liveout set for the function.
4084 // This isn't right, although it's probably harmless on x86; liveouts
4085 // should be computed from returns not tail calls. Consider a void
4086 // function making a tail call to a function returning int.
4087 MF.getFrameInfo().setHasTailCall();
4088 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4089 }
4090
4091 if (HasNoCfCheck && IsCFProtectionSupported) {
4092 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4093 } else {
4094 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4095 }
4096 InFlag = Chain.getValue(1);
4097
4098 // Create the CALLSEQ_END node.
4099 unsigned NumBytesForCalleeToPop;
4100 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4101 DAG.getTarget().Options.GuaranteedTailCallOpt))
4102 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4103 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4104 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4105 SR == StackStructReturn)
4106 // If this is a call to a struct-return function, the callee
4107 // pops the hidden struct pointer, so we have to push it back.
4108 // This is common for Darwin/X86, Linux & Mingw32 targets.
4109 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4110 NumBytesForCalleeToPop = 4;
4111 else
4112 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4113
4114 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4115 // No need to reset the stack after the call if the call doesn't return. To
4116 // make the MI verify, we'll pretend the callee does it for us.
4117 NumBytesForCalleeToPop = NumBytes;
4118 }
4119
4120 // Returns a flag for retval copy to use.
4121 if (!IsSibcall) {
4122 Chain = DAG.getCALLSEQ_END(Chain,
4123 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4124 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4125 true),
4126 InFlag, dl);
4127 InFlag = Chain.getValue(1);
4128 }
4129
4130 // Handle result values, copying them out of physregs into vregs that we
4131 // return.
4132 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4133 InVals, RegMask);
4134}
4135
4136//===----------------------------------------------------------------------===//
4137// Fast Calling Convention (tail call) implementation
4138//===----------------------------------------------------------------------===//
4139
4140// Like std call, callee cleans arguments, convention except that ECX is
4141// reserved for storing the tail called function address. Only 2 registers are
4142// free for argument passing (inreg). Tail call optimization is performed
4143// provided:
4144// * tailcallopt is enabled
4145// * caller/callee are fastcc
4146// On X86_64 architecture with GOT-style position independent code only local
4147// (within module) calls are supported at the moment.
4148// To keep the stack aligned according to platform abi the function
4149// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4150// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4151// If a tail called function callee has more arguments than the caller the
4152// caller needs to make sure that there is room to move the RETADDR to. This is
4153// achieved by reserving an area the size of the argument delta right after the
4154// original RETADDR, but before the saved framepointer or the spilled registers
4155// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4156// stack layout:
4157// arg1
4158// arg2
4159// RETADDR
4160// [ new RETADDR
4161// move area ]
4162// (possible EBP)
4163// ESI
4164// EDI
4165// local1 ..
4166
4167/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4168/// requirement.
4169unsigned
4170X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4171 SelectionDAG& DAG) const {
4172 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4173 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4174 unsigned StackAlignment = TFI.getStackAlignment();
4175 uint64_t AlignMask = StackAlignment - 1;
4176 int64_t Offset = StackSize;
4177 unsigned SlotSize = RegInfo->getSlotSize();
4178 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4179 // Number smaller than 12 so just add the difference.
4180 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4181 } else {
4182 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4183 Offset = ((~AlignMask) & Offset) + StackAlignment +
4184 (StackAlignment-SlotSize);
4185 }
4186 return Offset;
4187}
4188
4189/// Return true if the given stack call argument is already available in the
4190/// same position (relatively) of the caller's incoming argument stack.
4191static
4192bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4193 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4194 const X86InstrInfo *TII, const CCValAssign &VA) {
4195 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4196
4197 for (;;) {
4198 // Look through nodes that don't alter the bits of the incoming value.
4199 unsigned Op = Arg.getOpcode();
4200 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4201 Arg = Arg.getOperand(0);
4202 continue;
4203 }
4204 if (Op == ISD::TRUNCATE) {
4205 const SDValue &TruncInput = Arg.getOperand(0);
4206 if (TruncInput.getOpcode() == ISD::AssertZext &&
4207 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4208 Arg.getValueType()) {
4209 Arg = TruncInput.getOperand(0);
4210 continue;
4211 }
4212 }
4213 break;
4214 }
4215
4216 int FI = INT_MAX2147483647;
4217 if (Arg.getOpcode() == ISD::CopyFromReg) {
4218 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4219 if (!TargetRegisterInfo::isVirtualRegister(VR))
4220 return false;
4221 MachineInstr *Def = MRI->getVRegDef(VR);
4222 if (!Def)
4223 return false;
4224 if (!Flags.isByVal()) {
4225 if (!TII->isLoadFromStackSlot(*Def, FI))
4226 return false;
4227 } else {
4228 unsigned Opcode = Def->getOpcode();
4229 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4230 Opcode == X86::LEA64_32r) &&
4231 Def->getOperand(1).isFI()) {
4232 FI = Def->getOperand(1).getIndex();
4233 Bytes = Flags.getByValSize();
4234 } else
4235 return false;
4236 }
4237 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4238 if (Flags.isByVal())
4239 // ByVal argument is passed in as a pointer but it's now being
4240 // dereferenced. e.g.
4241 // define @foo(%struct.X* %A) {
4242 // tail call @bar(%struct.X* byval %A)
4243 // }
4244 return false;
4245 SDValue Ptr = Ld->getBasePtr();
4246 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4247 if (!FINode)
4248 return false;
4249 FI = FINode->getIndex();
4250 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4251 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4252 FI = FINode->getIndex();
4253 Bytes = Flags.getByValSize();
4254 } else
4255 return false;
4256
4257 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4257, __PRETTY_FUNCTION__))
;
4258 if (!MFI.isFixedObjectIndex(FI))
4259 return false;
4260
4261 if (Offset != MFI.getObjectOffset(FI))
4262 return false;
4263
4264 // If this is not byval, check that the argument stack object is immutable.
4265 // inalloca and argument copy elision can create mutable argument stack
4266 // objects. Byval objects can be mutated, but a byval call intends to pass the
4267 // mutated memory.
4268 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4269 return false;
4270
4271 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4272 // If the argument location is wider than the argument type, check that any
4273 // extension flags match.
4274 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4275 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4276 return false;
4277 }
4278 }
4279
4280 return Bytes == MFI.getObjectSize(FI);
4281}
4282
4283/// Check whether the call is eligible for tail call optimization. Targets
4284/// that want to do tail call optimization should implement this function.
4285bool X86TargetLowering::IsEligibleForTailCallOptimization(
4286 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4287 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4288 const SmallVectorImpl<ISD::OutputArg> &Outs,
4289 const SmallVectorImpl<SDValue> &OutVals,
4290 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4291 if (!mayTailCallThisCC(CalleeCC))
4292 return false;
4293
4294 // If -tailcallopt is specified, make fastcc functions tail-callable.
4295 MachineFunction &MF = DAG.getMachineFunction();
4296 const Function &CallerF = MF.getFunction();
4297
4298 // If the function return type is x86_fp80 and the callee return type is not,
4299 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4300 // perform a tailcall optimization here.
4301 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4302 return false;
4303
4304 CallingConv::ID CallerCC = CallerF.getCallingConv();
4305 bool CCMatch = CallerCC == CalleeCC;
4306 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4307 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4308
4309 // Win64 functions have extra shadow space for argument homing. Don't do the
4310 // sibcall if the caller and callee have mismatched expectations for this
4311 // space.
4312 if (IsCalleeWin64 != IsCallerWin64)
4313 return false;
4314
4315 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4316 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4317 return true;
4318 return false;
4319 }
4320
4321 // Look for obvious safe cases to perform tail call optimization that do not
4322 // require ABI changes. This is what gcc calls sibcall.
4323
4324 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4325 // emit a special epilogue.
4326 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4327 if (RegInfo->needsStackRealignment(MF))
4328 return false;
4329
4330 // Also avoid sibcall optimization if either caller or callee uses struct
4331 // return semantics.
4332 if (isCalleeStructRet || isCallerStructRet)
4333 return false;
4334
4335 // Do not sibcall optimize vararg calls unless all arguments are passed via
4336 // registers.
4337 LLVMContext &C = *DAG.getContext();
4338 if (isVarArg && !Outs.empty()) {
4339 // Optimizing for varargs on Win64 is unlikely to be safe without
4340 // additional testing.
4341 if (IsCalleeWin64 || IsCallerWin64)
4342 return false;
4343
4344 SmallVector<CCValAssign, 16> ArgLocs;
4345 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4346
4347 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4348 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4349 if (!ArgLocs[i].isRegLoc())
4350 return false;
4351 }
4352
4353 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4354 // stack. Therefore, if it's not used by the call it is not safe to optimize
4355 // this into a sibcall.
4356 bool Unused = false;
4357 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4358 if (!Ins[i].Used) {
4359 Unused = true;
4360 break;
4361 }
4362 }
4363 if (Unused) {
4364 SmallVector<CCValAssign, 16> RVLocs;
4365 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4366 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4367 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4368 CCValAssign &VA = RVLocs[i];
4369 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4370 return false;
4371 }
4372 }
4373
4374 // Check that the call results are passed in the same way.
4375 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4376 RetCC_X86, RetCC_X86))
4377 return false;
4378 // The callee has to preserve all registers the caller needs to preserve.
4379 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4380 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4381 if (!CCMatch) {
4382 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4383 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4384 return false;
4385 }
4386
4387 unsigned StackArgsSize = 0;
4388
4389 // If the callee takes no arguments then go on to check the results of the
4390 // call.
4391 if (!Outs.empty()) {
4392 // Check if stack adjustment is needed. For now, do not do this if any
4393 // argument is passed on the stack.
4394 SmallVector<CCValAssign, 16> ArgLocs;
4395 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4396
4397 // Allocate shadow area for Win64
4398 if (IsCalleeWin64)
4399 CCInfo.AllocateStack(32, 8);
4400
4401 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4402 StackArgsSize = CCInfo.getNextStackOffset();
4403
4404 if (CCInfo.getNextStackOffset()) {
4405 // Check if the arguments are already laid out in the right way as
4406 // the caller's fixed stack objects.
4407 MachineFrameInfo &MFI = MF.getFrameInfo();
4408 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4409 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4410 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4411 CCValAssign &VA = ArgLocs[i];
4412 SDValue Arg = OutVals[i];
4413 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4414 if (VA.getLocInfo() == CCValAssign::Indirect)
4415 return false;
4416 if (!VA.isRegLoc()) {
4417 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4418 MFI, MRI, TII, VA))
4419 return false;
4420 }
4421 }
4422 }
4423
4424 bool PositionIndependent = isPositionIndependent();
4425 // If the tailcall address may be in a register, then make sure it's
4426 // possible to register allocate for it. In 32-bit, the call address can
4427 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4428 // callee-saved registers are restored. These happen to be the same
4429 // registers used to pass 'inreg' arguments so watch out for those.
4430 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4431 !isa<ExternalSymbolSDNode>(Callee)) ||
4432 PositionIndependent)) {
4433 unsigned NumInRegs = 0;
4434 // In PIC we need an extra register to formulate the address computation
4435 // for the callee.
4436 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4437
4438 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4439 CCValAssign &VA = ArgLocs[i];
4440 if (!VA.isRegLoc())
4441 continue;
4442 unsigned Reg = VA.getLocReg();
4443 switch (Reg) {
4444 default: break;
4445 case X86::EAX: case X86::EDX: case X86::ECX:
4446 if (++NumInRegs == MaxInRegs)
4447 return false;
4448 break;
4449 }
4450 }
4451 }
4452
4453 const MachineRegisterInfo &MRI = MF.getRegInfo();
4454 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4455 return false;
4456 }
4457
4458 bool CalleeWillPop =
4459 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4460 MF.getTarget().Options.GuaranteedTailCallOpt);
4461
4462 if (unsigned BytesToPop =
4463 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4464 // If we have bytes to pop, the callee must pop them.
4465 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4466 if (!CalleePopMatches)
4467 return false;
4468 } else if (CalleeWillPop && StackArgsSize > 0) {
4469 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4470 return false;
4471 }
4472
4473 return true;
4474}
4475
4476FastISel *
4477X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4478 const TargetLibraryInfo *libInfo) const {
4479 return X86::createFastISel(funcInfo, libInfo);
4480}
4481
4482//===----------------------------------------------------------------------===//
4483// Other Lowering Hooks
4484//===----------------------------------------------------------------------===//
4485
4486static bool MayFoldLoad(SDValue Op) {
4487 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4488}
4489
4490static bool MayFoldIntoStore(SDValue Op) {
4491 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4492}
4493
4494static bool MayFoldIntoZeroExtend(SDValue Op) {
4495 if (Op.hasOneUse()) {
4496 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4497 return (ISD::ZERO_EXTEND == Opcode);
4498 }
4499 return false;
4500}
4501
4502static bool isTargetShuffle(unsigned Opcode) {
4503 switch(Opcode) {
4504 default: return false;
4505 case X86ISD::BLENDI:
4506 case X86ISD::PSHUFB:
4507 case X86ISD::PSHUFD:
4508 case X86ISD::PSHUFHW:
4509 case X86ISD::PSHUFLW:
4510 case X86ISD::SHUFP:
4511 case X86ISD::INSERTPS:
4512 case X86ISD::EXTRQI:
4513 case X86ISD::INSERTQI:
4514 case X86ISD::PALIGNR:
4515 case X86ISD::VSHLDQ:
4516 case X86ISD::VSRLDQ:
4517 case X86ISD::MOVLHPS:
4518 case X86ISD::MOVHLPS:
4519 case X86ISD::MOVSHDUP:
4520 case X86ISD::MOVSLDUP:
4521 case X86ISD::MOVDDUP:
4522 case X86ISD::MOVSS:
4523 case X86ISD::MOVSD:
4524 case X86ISD::UNPCKL:
4525 case X86ISD::UNPCKH:
4526 case X86ISD::VBROADCAST:
4527 case X86ISD::VPERMILPI:
4528 case X86ISD::VPERMILPV:
4529 case X86ISD::VPERM2X128:
4530 case X86ISD::SHUF128:
4531 case X86ISD::VPERMIL2:
4532 case X86ISD::VPERMI:
4533 case X86ISD::VPPERM:
4534 case X86ISD::VPERMV:
4535 case X86ISD::VPERMV3:
4536 case X86ISD::VZEXT_MOVL:
4537 return true;
4538 }
4539}
4540
4541static bool isTargetShuffleVariableMask(unsigned Opcode) {
4542 switch (Opcode) {
4543 default: return false;
4544 // Target Shuffles.
4545 case X86ISD::PSHUFB:
4546 case X86ISD::VPERMILPV:
4547 case X86ISD::VPERMIL2:
4548 case X86ISD::VPPERM:
4549 case X86ISD::VPERMV:
4550 case X86ISD::VPERMV3:
4551 return true;
4552 // 'Faux' Target Shuffles.
4553 case ISD::OR:
4554 case ISD::AND:
4555 case X86ISD::ANDNP:
4556 return true;
4557 }
4558}
4559
4560SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4561 MachineFunction &MF = DAG.getMachineFunction();
4562 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4563 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4564 int ReturnAddrIndex = FuncInfo->getRAIndex();
4565
4566 if (ReturnAddrIndex == 0) {
4567 // Set up a frame object for the return address.
4568 unsigned SlotSize = RegInfo->getSlotSize();
4569 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4570 -(int64_t)SlotSize,
4571 false);
4572 FuncInfo->setRAIndex(ReturnAddrIndex);
4573 }
4574
4575 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4576}
4577
4578bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4579 bool hasSymbolicDisplacement) {
4580 // Offset should fit into 32 bit immediate field.
4581 if (!isInt<32>(Offset))
4582 return false;
4583
4584 // If we don't have a symbolic displacement - we don't have any extra
4585 // restrictions.
4586 if (!hasSymbolicDisplacement)
4587 return true;
4588
4589 // FIXME: Some tweaks might be needed for medium code model.
4590 if (M != CodeModel::Small && M != CodeModel::Kernel)
4591 return false;
4592
4593 // For small code model we assume that latest object is 16MB before end of 31
4594 // bits boundary. We may also accept pretty large negative constants knowing
4595 // that all objects are in the positive half of address space.
4596 if (M == CodeModel::Small && Offset < 16*1024*1024)
4597 return true;
4598
4599 // For kernel code model we know that all object resist in the negative half
4600 // of 32bits address space. We may not accept negative offsets, since they may
4601 // be just off and we may accept pretty large positive ones.
4602 if (M == CodeModel::Kernel && Offset >= 0)
4603 return true;
4604
4605 return false;
4606}
4607
4608/// Determines whether the callee is required to pop its own arguments.
4609/// Callee pop is necessary to support tail calls.
4610bool X86::isCalleePop(CallingConv::ID CallingConv,
4611 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4612 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4613 // can guarantee TCO.
4614 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4615 return true;
4616
4617 switch (CallingConv) {
4618 default:
4619 return false;
4620 case CallingConv::X86_StdCall:
4621 case CallingConv::X86_FastCall:
4622 case CallingConv::X86_ThisCall:
4623 case CallingConv::X86_VectorCall:
4624 return !is64Bit;
4625 }
4626}
4627
4628/// Return true if the condition is an unsigned comparison operation.
4629static bool isX86CCUnsigned(unsigned X86CC) {
4630 switch (X86CC) {
4631 default:
4632 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4632)
;
4633 case X86::COND_E:
4634 case X86::COND_NE:
4635 case X86::COND_B:
4636 case X86::COND_A:
4637 case X86::COND_BE:
4638 case X86::COND_AE:
4639 return true;
4640 case X86::COND_G:
4641 case X86::COND_GE:
4642 case X86::COND_L:
4643 case X86::COND_LE:
4644 return false;
4645 }
4646}
4647
4648static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4649 switch (SetCCOpcode) {
4650 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4650)
;
4651 case ISD::SETEQ: return X86::COND_E;
4652 case ISD::SETGT: return X86::COND_G;
4653 case ISD::SETGE: return X86::COND_GE;
4654 case ISD::SETLT: return X86::COND_L;
4655 case ISD::SETLE: return X86::COND_LE;
4656 case ISD::SETNE: return X86::COND_NE;
4657 case ISD::SETULT: return X86::COND_B;
4658 case ISD::SETUGT: return X86::COND_A;
4659 case ISD::SETULE: return X86::COND_BE;
4660 case ISD::SETUGE: return X86::COND_AE;
4661 }
4662}
4663
4664/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4665/// condition code, returning the condition code and the LHS/RHS of the
4666/// comparison to make.
4667static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4668 bool isFP, SDValue &LHS, SDValue &RHS,
4669 SelectionDAG &DAG) {
4670 if (!isFP) {
4671 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4672 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4673 // X > -1 -> X == 0, jump !sign.
4674 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4675 return X86::COND_NS;
4676 }
4677 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4678 // X < 0 -> X == 0, jump on sign.
4679 return X86::COND_S;
4680 }
4681 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4682 // X < 1 -> X <= 0
4683 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4684 return X86::COND_LE;
4685 }
4686 }
4687
4688 return TranslateIntegerX86CC(SetCCOpcode);
4689 }
4690
4691 // First determine if it is required or is profitable to flip the operands.
4692
4693 // If LHS is a foldable load, but RHS is not, flip the condition.
4694 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4695 !ISD::isNON_EXTLoad(RHS.getNode())) {
4696 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4697 std::swap(LHS, RHS);
4698 }
4699
4700 switch (SetCCOpcode) {
4701 default: break;
4702 case ISD::SETOLT:
4703 case ISD::SETOLE:
4704 case ISD::SETUGT:
4705 case ISD::SETUGE:
4706 std::swap(LHS, RHS);
4707 break;
4708 }
4709
4710 // On a floating point condition, the flags are set as follows:
4711 // ZF PF CF op
4712 // 0 | 0 | 0 | X > Y
4713 // 0 | 0 | 1 | X < Y
4714 // 1 | 0 | 0 | X == Y
4715 // 1 | 1 | 1 | unordered
4716 switch (SetCCOpcode) {
4717 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4717)
;
4718 case ISD::SETUEQ:
4719 case ISD::SETEQ: return X86::COND_E;
4720 case ISD::SETOLT: // flipped
4721 case ISD::SETOGT:
4722 case ISD::SETGT: return X86::COND_A;
4723 case ISD::SETOLE: // flipped
4724 case ISD::SETOGE:
4725 case ISD::SETGE: return X86::COND_AE;
4726 case ISD::SETUGT: // flipped
4727 case ISD::SETULT:
4728 case ISD::SETLT: return X86::COND_B;
4729 case ISD::SETUGE: // flipped
4730 case ISD::SETULE:
4731 case ISD::SETLE: return X86::COND_BE;
4732 case ISD::SETONE:
4733 case ISD::SETNE: return X86::COND_NE;
4734 case ISD::SETUO: return X86::COND_P;
4735 case ISD::SETO: return X86::COND_NP;
4736 case ISD::SETOEQ:
4737 case ISD::SETUNE: return X86::COND_INVALID;
4738 }
4739}
4740
4741/// Is there a floating point cmov for the specific X86 condition code?
4742/// Current x86 isa includes the following FP cmov instructions:
4743/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4744static bool hasFPCMov(unsigned X86CC) {
4745 switch (X86CC) {
4746 default:
4747 return false;
4748 case X86::COND_B:
4749 case X86::COND_BE:
4750 case X86::COND_E:
4751 case X86::COND_P:
4752 case X86::COND_A:
4753 case X86::COND_AE:
4754 case X86::COND_NE:
4755 case X86::COND_NP:
4756 return true;
4757 }
4758}
4759
4760
4761bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4762 const CallInst &I,
4763 MachineFunction &MF,
4764 unsigned Intrinsic) const {
4765
4766 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4767 if (!IntrData)
4768 return false;
4769
4770 Info.flags = MachineMemOperand::MONone;
4771 Info.offset = 0;
4772
4773 switch (IntrData->Type) {
4774 case TRUNCATE_TO_MEM_VI8:
4775 case TRUNCATE_TO_MEM_VI16:
4776 case TRUNCATE_TO_MEM_VI32: {
4777 Info.opc = ISD::INTRINSIC_VOID;
4778 Info.ptrVal = I.getArgOperand(0);
4779 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4780 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4781 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4782 ScalarVT = MVT::i8;
4783 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4784 ScalarVT = MVT::i16;
4785 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4786 ScalarVT = MVT::i32;
4787
4788 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4789 Info.align = 1;
4790 Info.flags |= MachineMemOperand::MOStore;
4791 break;
4792 }
4793 case GATHER:
4794 case GATHER_AVX2: {
4795 Info.opc = ISD::INTRINSIC_W_CHAIN;
4796 Info.ptrVal = nullptr;
4797 MVT DataVT = MVT::getVT(I.getType());
4798 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4799 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4800 IndexVT.getVectorNumElements());
4801 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4802 Info.align = 1;
4803 Info.flags |= MachineMemOperand::MOLoad;
4804 break;
4805 }
4806 case SCATTER: {
4807 Info.opc = ISD::INTRINSIC_VOID;
4808 Info.ptrVal = nullptr;
4809 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4810 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4811 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4812 IndexVT.getVectorNumElements());
4813 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4814 Info.align = 1;
4815 Info.flags |= MachineMemOperand::MOStore;
4816 break;
4817 }
4818 default:
4819 return false;
4820 }
4821
4822 return true;
4823}
4824
4825/// Returns true if the target can instruction select the
4826/// specified FP immediate natively. If false, the legalizer will
4827/// materialize the FP immediate as a load from a constant pool.
4828bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4829 bool ForCodeSize) const {
4830 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4831 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4832 return true;
4833 }
4834 return false;
4835}
4836
4837bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4838 ISD::LoadExtType ExtTy,
4839 EVT NewVT) const {
4840 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4841 // relocation target a movq or addq instruction: don't let the load shrink.
4842 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4843 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4844 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4845 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4846 return true;
4847}
4848
4849/// Returns true if it is beneficial to convert a load of a constant
4850/// to just the constant itself.
4851bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4852 Type *Ty) const {
4853 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 4853, __PRETTY_FUNCTION__))
;
4854
4855 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4856 if (BitSize == 0 || BitSize > 64)
4857 return false;
4858 return true;
4859}
4860
4861bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
4862 // If we are using XMM registers in the ABI and the condition of the select is
4863 // a floating-point compare and we have blendv or conditional move, then it is
4864 // cheaper to select instead of doing a cross-register move and creating a
4865 // load that depends on the compare result.
4866 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
4867}
4868
4869bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4870 // TODO: It might be a win to ease or lift this restriction, but the generic
4871 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4872 if (VT.isVector() && Subtarget.hasAVX512())
4873 return false;
4874
4875 return true;
4876}
4877
4878bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4879 // TODO: We handle scalars using custom code, but generic combining could make
4880 // that unnecessary.
4881 APInt MulC;
4882 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4883 return false;
4884
4885 // If vector multiply is legal, assume that's faster than shl + add/sub.
4886 // TODO: Multiply is a complex op with higher latency and lower througput in
4887 // most implementations, so this check could be loosened based on type
4888 // and/or a CPU attribute.
4889 if (isOperationLegal(ISD::MUL, VT))
4890 return false;
4891
4892 // shl+add, shl+sub, shl+add+neg
4893 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4894 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4895}
4896
4897bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4898 bool IsSigned) const {
4899 // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4900 return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4901}
4902
4903bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4904 unsigned Index) const {
4905 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4906 return false;
4907
4908 // Mask vectors support all subregister combinations and operations that
4909 // extract half of vector.
4910 if (ResVT.getVectorElementType() == MVT::i1)
4911 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4912 (Index == ResVT.getVectorNumElements()));
4913
4914 return (Index % ResVT.getVectorNumElements()) == 0;
4915}
4916
4917bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
4918 // If the vector op is not supported, try to convert to scalar.
4919 EVT VecVT = VecOp.getValueType();
4920 if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
4921 return true;
4922
4923 // If the vector op is supported, but the scalar op is not, the transform may
4924 // not be worthwhile.
4925 EVT ScalarVT = VecVT.getScalarType();
4926 return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
4927}
4928
4929bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
4930 // TODO: Allow vectors?
4931 if (VT.isVector())
4932 return false;
4933 return VT.isSimple() || !isOperationExpand(Opcode, VT);
4934}
4935
4936bool X86TargetLowering::isCheapToSpeculateCttz() const {
4937 // Speculate cttz only if we can directly use TZCNT.
4938 return Subtarget.hasBMI();
4939}
4940
4941bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4942 // Speculate ctlz only if we can directly use LZCNT.
4943 return Subtarget.hasLZCNT();
4944}
4945
4946bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4947 EVT BitcastVT) const {
4948 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
4949 BitcastVT.getVectorElementType() == MVT::i1)
4950 return false;
4951
4952 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
4953 return false;
4954
4955 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4956}
4957
4958bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4959 const SelectionDAG &DAG) const {
4960 // Do not merge to float value size (128 bytes) if no implicit
4961 // float attribute is set.
4962 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4963 Attribute::NoImplicitFloat);
4964
4965 if (NoFloat) {
4966 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4967 return (MemVT.getSizeInBits() <= MaxIntSize);
4968 }
4969 return true;
4970}
4971
4972bool X86TargetLowering::isCtlzFast() const {
4973 return Subtarget.hasFastLZCNT();
4974}
4975
4976bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4977 const Instruction &AndI) const {
4978 return true;
4979}
4980
4981bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4982 EVT VT = Y.getValueType();
4983
4984 if (VT.isVector())
4985 return false;
4986
4987 if (!Subtarget.hasBMI())
4988 return false;
4989
4990 // There are only 32-bit and 64-bit forms for 'andn'.
4991 if (VT != MVT::i32 && VT != MVT::i64)
4992 return false;
4993
4994 return !isa<ConstantSDNode>(Y);
4995}
4996
4997bool X86TargetLowering::hasAndNot(SDValue Y) const {
4998 EVT VT = Y.getValueType();
4999
5000 if (!VT.isVector())
5001 return hasAndNotCompare(Y);
5002
5003 // Vector.
5004
5005 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5006 return false;
5007
5008 if (VT == MVT::v4i32)
5009 return true;
5010
5011 return Subtarget.hasSSE2();
5012}
5013
5014bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5015 EVT VT = Y.getValueType();
5016
5017 // For vectors, we don't have a preference, but we probably want a mask.
5018 if (VT.isVector())
5019 return false;
5020
5021 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5022 if (VT == MVT::i64 && !Subtarget.is64Bit())
5023 return false;
5024
5025 return true;
5026}
5027
5028bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5029 // Any legal vector type can be splatted more efficiently than
5030 // loading/spilling from memory.
5031 return isTypeLegal(VT);
5032}
5033
5034MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5035 MVT VT = MVT::getIntegerVT(NumBits);
5036 if (isTypeLegal(VT))
5037 return VT;
5038
5039 // PMOVMSKB can handle this.
5040 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5041 return MVT::v16i8;
5042
5043 // VPMOVMSKB can handle this.
5044 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5045 return MVT::v32i8;
5046
5047 // TODO: Allow 64-bit type for 32-bit target.
5048 // TODO: 512-bit types should be allowed, but make sure that those
5049 // cases are handled in combineVectorSizedSetCCEquality().
5050
5051 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5052}
5053
5054/// Val is the undef sentinel value or equal to the specified value.
5055static bool isUndefOrEqual(int Val, int CmpVal) {
5056 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5057}
5058
5059/// Val is either the undef or zero sentinel value.
5060static bool isUndefOrZero(int Val) {
5061 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5062}
5063
5064/// Return true if every element in Mask, beginning from position Pos and ending
5065/// in Pos+Size is the undef sentinel value.
5066static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5067 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5068 if (Mask[i] != SM_SentinelUndef)
5069 return false;
5070 return true;
5071}
5072
5073/// Return true if the mask creates a vector whose lower half is undefined.
5074static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5075 unsigned NumElts = Mask.size();
5076 return isUndefInRange(Mask, 0, NumElts / 2);
5077}
5078
5079/// Return true if the mask creates a vector whose upper half is undefined.
5080static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5081 unsigned NumElts = Mask.size();
5082 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5083}
5084
5085/// Return true if Val falls within the specified range (L, H].
5086static bool isInRange(int Val, int Low, int Hi) {
5087 return (Val >= Low && Val < Hi);
5088}
5089
5090/// Return true if the value of any element in Mask falls within the specified
5091/// range (L, H].
5092static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5093 for (int M : Mask)
5094 if (isInRange(M, Low, Hi))
5095 return true;
5096 return false;
5097}
5098
5099/// Return true if Val is undef or if its value falls within the
5100/// specified range (L, H].
5101static bool isUndefOrInRange(int Val, int Low, int Hi) {
5102 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5103}
5104
5105/// Return true if every element in Mask is undef or if its value
5106/// falls within the specified range (L, H].
5107static bool isUndefOrInRange(ArrayRef<int> Mask,
5108 int Low, int Hi) {
5109 for (int M : Mask)
5110 if (!isUndefOrInRange(M, Low, Hi))
5111 return false;
5112 return true;
5113}
5114
5115/// Return true if Val is undef, zero or if its value falls within the
5116/// specified range (L, H].
5117static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5118 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5119}
5120
5121/// Return true if every element in Mask is undef, zero or if its value
5122/// falls within the specified range (L, H].
5123static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5124 for (int M : Mask)
5125 if (!isUndefOrZeroOrInRange(M, Low, Hi))
5126 return false;
5127 return true;
5128}
5129
5130/// Return true if every element in Mask, beginning
5131/// from position Pos and ending in Pos + Size, falls within the specified
5132/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5133static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5134 unsigned Size, int Low, int Step = 1) {
5135 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5136 if (!isUndefOrEqual(Mask[i], Low))
5137 return false;
5138 return true;
5139}
5140
5141/// Return true if every element in Mask, beginning
5142/// from position Pos and ending in Pos+Size, falls within the specified
5143/// sequential range (Low, Low+Size], or is undef or is zero.
5144static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5145 unsigned Size, int Low) {
5146 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
5147 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5148 return false;
5149 return true;
5150}
5151
5152/// Return true if every element in Mask, beginning
5153/// from position Pos and ending in Pos+Size is undef or is zero.
5154static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5155 unsigned Size) {
5156 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5157 if (!isUndefOrZero(Mask[i]))
5158 return false;
5159 return true;
5160}
5161
5162/// Helper function to test whether a shuffle mask could be
5163/// simplified by widening the elements being shuffled.
5164///
5165/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5166/// leaves it in an unspecified state.
5167///
5168/// NOTE: This must handle normal vector shuffle masks and *target* vector
5169/// shuffle masks. The latter have the special property of a '-2' representing
5170/// a zero-ed lane of a vector.
5171static bool canWidenShuffleElements(ArrayRef<int> Mask,
5172 SmallVectorImpl<int> &WidenedMask) {
5173 WidenedMask.assign(Mask.size() / 2, 0);
5174 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5175 int M0 = Mask[i];
5176 int M1 = Mask[i + 1];
5177
5178 // If both elements are undef, its trivial.
5179 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5180 WidenedMask[i / 2] = SM_SentinelUndef;
5181 continue;
5182 }
5183
5184 // Check for an undef mask and a mask value properly aligned to fit with
5185 // a pair of values. If we find such a case, use the non-undef mask's value.
5186 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5187 WidenedMask[i / 2] = M1 / 2;
5188 continue;
5189 }
5190 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5191 WidenedMask[i / 2] = M0 / 2;
5192 continue;
5193 }
5194
5195 // When zeroing, we need to spread the zeroing across both lanes to widen.
5196 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5197 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5198 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5199 WidenedMask[i / 2] = SM_SentinelZero;
5200 continue;
5201 }
5202 return false;
5203 }
5204
5205 // Finally check if the two mask values are adjacent and aligned with
5206 // a pair.
5207 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5208 WidenedMask[i / 2] = M0 / 2;
5209 continue;
5210 }
5211
5212 // Otherwise we can't safely widen the elements used in this shuffle.
5213 return false;
5214 }
5215 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5216, __PRETTY_FUNCTION__))
5216 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5216, __PRETTY_FUNCTION__))
;
5217
5218 return true;
5219}
5220
5221static bool canWidenShuffleElements(ArrayRef<int> Mask,
5222 const APInt &Zeroable,
5223 SmallVectorImpl<int> &WidenedMask) {
5224 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5225 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5226 if (TargetMask[i] == SM_SentinelUndef)
5227 continue;
5228 if (Zeroable[i])
5229 TargetMask[i] = SM_SentinelZero;
5230 }
5231 return canWidenShuffleElements(TargetMask, WidenedMask);
5232}
5233
5234static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5235 SmallVector<int, 32> WidenedMask;
5236 return canWidenShuffleElements(Mask, WidenedMask);
5237}
5238
5239/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5240bool X86::isZeroNode(SDValue Elt) {
5241 return isNullConstant(Elt) || isNullFPConstant(Elt);
5242}
5243
5244// Build a vector of constants.
5245// Use an UNDEF node if MaskElt == -1.
5246// Split 64-bit constants in the 32-bit mode.
5247static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5248 const SDLoc &dl, bool IsMask = false) {
5249
5250 SmallVector<SDValue, 32> Ops;
5251 bool Split = false;
5252
5253 MVT ConstVecVT = VT;
5254 unsigned NumElts = VT.getVectorNumElements();
5255 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5256 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5257 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5258 Split = true;
5259 }
5260
5261 MVT EltVT = ConstVecVT.getVectorElementType();
5262 for (unsigned i = 0; i < NumElts; ++i) {
5263 bool IsUndef = Values[i] < 0 && IsMask;
5264 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5265 DAG.getConstant(Values[i], dl, EltVT);
5266 Ops.push_back(OpNode);
5267 if (Split)
5268 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5269 DAG.getConstant(0, dl, EltVT));
5270 }
5271 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5272 if (Split)
5273 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5274 return ConstsNode;
5275}
5276
5277static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5278 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5279 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5280, __PRETTY_FUNCTION__))
5280 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5280, __PRETTY_FUNCTION__))
;
5281 SmallVector<SDValue, 32> Ops;
5282 bool Split = false;
5283
5284 MVT ConstVecVT = VT;
5285 unsigned NumElts = VT.getVectorNumElements();
5286 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5287 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5288 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5289 Split = true;
5290 }
5291
5292 MVT EltVT = ConstVecVT.getVectorElementType();
5293 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5294 if (Undefs[i]) {
5295 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5296 continue;
5297 }
5298 const APInt &V = Bits[i];
5299 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5299, __PRETTY_FUNCTION__))
;
5300 if (Split) {
5301 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5302 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5303 } else if (EltVT == MVT::f32) {
5304 APFloat FV(APFloat::IEEEsingle(), V);
5305 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5306 } else if (EltVT == MVT::f64) {
5307 APFloat FV(APFloat::IEEEdouble(), V);
5308 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5309 } else {
5310 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5311 }
5312 }
5313
5314 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5315 return DAG.getBitcast(VT, ConstsNode);
5316}
5317
5318/// Returns a vector of specified type with all zero elements.
5319static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5320 SelectionDAG &DAG, const SDLoc &dl) {
5321 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5323, __PRETTY_FUNCTION__))
5322 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5323, __PRETTY_FUNCTION__))
5323 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5323, __PRETTY_FUNCTION__))
;
5324
5325 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5326 // type. This ensures they get CSE'd. But if the integer type is not
5327 // available, use a floating-point +0.0 instead.
5328 SDValue Vec;
5329 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5330 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5331 } else if (VT.getVectorElementType() == MVT::i1) {
5332 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __PRETTY_FUNCTION__))
5333 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __PRETTY_FUNCTION__))
;
5334 Vec = DAG.getConstant(0, dl, VT);
5335 } else {
5336 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5337 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5338 }
5339 return DAG.getBitcast(VT, Vec);
5340}
5341
5342static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5343 const SDLoc &dl, unsigned vectorWidth) {
5344 EVT VT = Vec.getValueType();
5345 EVT ElVT = VT.getVectorElementType();
5346 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5347 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5348 VT.getVectorNumElements()/Factor);
5349
5350 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5351 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5352 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
;
5353
5354 // This is the index of the first element of the vectorWidth-bit chunk
5355 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5356 IdxVal &= ~(ElemsPerChunk - 1);
5357
5358 // If the input is a buildvector just emit a smaller one.
5359 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5360 return DAG.getBuildVector(ResultVT, dl,
5361 Vec->ops().slice(IdxVal, ElemsPerChunk));
5362
5363 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5364 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5365}
5366
5367/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5368/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5369/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5370/// instructions or a simple subregister reference. Idx is an index in the
5371/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5372/// lowering EXTRACT_VECTOR_ELT operations easier.
5373static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5374 SelectionDAG &DAG, const SDLoc &dl) {
5375 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5376, __PRETTY_FUNCTION__))
5376 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5376, __PRETTY_FUNCTION__))
;
5377 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5378}
5379
5380/// Generate a DAG to grab 256-bits from a 512-bit vector.
5381static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5382 SelectionDAG &DAG, const SDLoc &dl) {
5383 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5383, __PRETTY_FUNCTION__))
;
5384 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5385}
5386
5387static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5388 SelectionDAG &DAG, const SDLoc &dl,
5389 unsigned vectorWidth) {
5390 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5391, __PRETTY_FUNCTION__))
5391 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5391, __PRETTY_FUNCTION__))
;
5392 // Inserting UNDEF is Result
5393 if (Vec.isUndef())
5394 return Result;
5395 EVT VT = Vec.getValueType();
5396 EVT ElVT = VT.getVectorElementType();
5397 EVT ResultVT = Result.getValueType();
5398
5399 // Insert the relevant vectorWidth bits.
5400 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5401 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5401, __PRETTY_FUNCTION__))
;
5402
5403 // This is the index of the first element of the vectorWidth-bit chunk
5404 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5405 IdxVal &= ~(ElemsPerChunk - 1);
5406
5407 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5408 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5409}
5410
5411/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5412/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5413/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5414/// simple superregister reference. Idx is an index in the 128 bits
5415/// we want. It need not be aligned to a 128-bit boundary. That makes
5416/// lowering INSERT_VECTOR_ELT operations easier.
5417static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5418 SelectionDAG &DAG, const SDLoc &dl) {
5419 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5419, __PRETTY_FUNCTION__))
;
5420 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5421}
5422
5423/// Widen a vector to a larger size with the same scalar type, with the new
5424/// elements either zero or undef.
5425static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5426 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5427 const SDLoc &dl) {
5428 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5430, __PRETTY_FUNCTION__))
5429 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5430, __PRETTY_FUNCTION__))
5430 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5430, __PRETTY_FUNCTION__))
;
5431 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5432 : DAG.getUNDEF(VT);
5433 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5434 DAG.getIntPtrConstant(0, dl));
5435}
5436
5437// Helper for splitting operands of an operation to legal target size and
5438// apply a function on each part.
5439// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5440// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5441// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5442// The argument Builder is a function that will be applied on each split part:
5443// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5444template <typename F>
5445SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5446 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5447 F Builder, bool CheckBWI = true) {
5448 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5448, __PRETTY_FUNCTION__))
;
5449 unsigned NumSubs = 1;
5450 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5451 (!CheckBWI && Subtarget.useAVX512Regs())) {
5452 if (VT.getSizeInBits() > 512) {
5453 NumSubs = VT.getSizeInBits() / 512;
5454 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5454, __PRETTY_FUNCTION__))
;
5455 }
5456 } else if (Subtarget.hasAVX2()) {
5457 if (VT.getSizeInBits() > 256) {
5458 NumSubs = VT.getSizeInBits() / 256;
5459 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5459, __PRETTY_FUNCTION__))
;
5460 }
5461 } else {
5462 if (VT.getSizeInBits() > 128) {
5463 NumSubs = VT.getSizeInBits() / 128;
5464 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5464, __PRETTY_FUNCTION__))
;
5465 }
5466 }
5467
5468 if (NumSubs == 1)
5469 return Builder(DAG, DL, Ops);
5470
5471 SmallVector<SDValue, 4> Subs;
5472 for (unsigned i = 0; i != NumSubs; ++i) {
5473 SmallVector<SDValue, 2> SubOps;
5474 for (SDValue Op : Ops) {
5475 EVT OpVT = Op.getValueType();
5476 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5477 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5478 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5479 }
5480 Subs.push_back(Builder(DAG, DL, SubOps));
5481 }
5482 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5483}
5484
5485// Return true if the instruction zeroes the unused upper part of the
5486// destination and accepts mask.
5487static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5488 switch (Opcode) {
5489 default:
5490 return false;
5491 case X86ISD::CMPM:
5492 case X86ISD::CMPM_SAE:
5493 case ISD::SETCC:
5494 return true;
5495 }
5496}
5497
5498/// Insert i1-subvector to i1-vector.
5499static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5500 const X86Subtarget &Subtarget) {
5501
5502 SDLoc dl(Op);
5503 SDValue Vec = Op.getOperand(0);
5504 SDValue SubVec = Op.getOperand(1);
5505 SDValue Idx = Op.getOperand(2);
5506
5507 if (!isa<ConstantSDNode>(Idx))
5508 return SDValue();
5509
5510 // Inserting undef is a nop. We can just return the original vector.
5511 if (SubVec.isUndef())
5512 return Vec;
5513
5514 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5515 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5516 return Op;
5517
5518 MVT OpVT = Op.getSimpleValueType();
5519 unsigned NumElems = OpVT.getVectorNumElements();
5520
5521 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5522
5523 // Extend to natively supported kshift.
5524 MVT WideOpVT = OpVT;
5525 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5526 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5527
5528 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5529 // if necessary.
5530 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5531 // May need to promote to a legal type.
5532 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5533 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5534 SubVec, Idx);
5535 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5536 }
5537
5538 MVT SubVecVT = SubVec.getSimpleValueType();
5539 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5540
5541 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5543, __PRETTY_FUNCTION__))
5542 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5543, __PRETTY_FUNCTION__))
5543 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5543, __PRETTY_FUNCTION__))
;
5544
5545 SDValue Undef = DAG.getUNDEF(WideOpVT);
5546
5547 if (IdxVal == 0) {
5548 // Zero lower bits of the Vec
5549 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5550 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5551 ZeroIdx);
5552 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5553 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5554 // Merge them together, SubVec should be zero extended.
5555 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5556 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5557 SubVec, ZeroIdx);
5558 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5559 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5560 }
5561
5562 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5563 Undef, SubVec, ZeroIdx);
5564
5565 if (Vec.isUndef()) {
5566 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
;
5567 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5568 DAG.getConstant(IdxVal, dl, MVT::i8));
5569 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5570 }
5571
5572 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5573 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5573, __PRETTY_FUNCTION__))
;
5574 NumElems = WideOpVT.getVectorNumElements();
5575 unsigned ShiftLeft = NumElems - SubVecNumElems;
5576 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5577 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5578 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5579 if (ShiftRight != 0)
5580 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5581 DAG.getConstant(ShiftRight, dl, MVT::i8));
5582 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5583 }
5584
5585 // Simple case when we put subvector in the upper part
5586 if (IdxVal + SubVecNumElems == NumElems) {
5587 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5588 DAG.getConstant(IdxVal, dl, MVT::i8));
5589 if (SubVecNumElems * 2 == NumElems) {
5590 // Special case, use legal zero extending insert_subvector. This allows
5591 // isel to opimitize when bits are known zero.
5592 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5593 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5594 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5595 Vec, ZeroIdx);
5596 } else {
5597 // Otherwise use explicit shifts to zero the bits.
5598 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5599 Undef, Vec, ZeroIdx);
5600 NumElems = WideOpVT.getVectorNumElements();
5601 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5602 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5603 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5604 }
5605 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5606 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5607 }
5608
5609 // Inserting into the middle is more complicated.
5610
5611 NumElems = WideOpVT.getVectorNumElements();
5612
5613 // Widen the vector if needed.
5614 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5615 // Move the current value of the bit to be replace to the lsbs.
5616 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5617 DAG.getConstant(IdxVal, dl, MVT::i8));
5618 // Xor with the new bit.
5619 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5620 // Shift to MSB, filling bottom bits with 0.
5621 unsigned ShiftLeft = NumElems - SubVecNumElems;
5622 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5623 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5624 // Shift to the final position, filling upper bits with 0.
5625 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5626 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5627 DAG.getConstant(ShiftRight, dl, MVT::i8));
5628 // Xor with original vector leaving the new value.
5629 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5630 // Reduce to original width if needed.
5631 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5632}
5633
5634static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5635 unsigned NumElems, SelectionDAG &DAG,
5636 const SDLoc &dl, unsigned VectorWidth) {
5637 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5638 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5639}
5640
5641/// Returns a vector of specified type with all bits set.
5642/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5643/// Then bitcast to their original type, ensuring they get CSE'd.
5644static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5645 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5646, __PRETTY_FUNCTION__))
5646 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5646, __PRETTY_FUNCTION__))
;
5647
5648 APInt Ones = APInt::getAllOnesValue(32);
5649 unsigned NumElts = VT.getSizeInBits() / 32;
5650 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5651 return DAG.getBitcast(VT, Vec);
5652}
5653
5654static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
5655 SelectionDAG &DAG) {
5656 EVT InVT = In.getValueType();
5657 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5657, __PRETTY_FUNCTION__))
;
5658
5659 // For 256-bit vectors, we only need the lower (128-bit) input half.
5660 // For 512-bit vectors, we only need the lower input half or quarter.
5661 if (InVT.getSizeInBits() > 128) {
5662 assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5663, __PRETTY_FUNCTION__))
5663 "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5663, __PRETTY_FUNCTION__))
;
5664 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5665 In = extractSubVector(In, 0, DAG, DL,
5666 std::max(128U, VT.getSizeInBits() / Scale));
5667 InVT = In.getValueType();
5668 }
5669
5670 if (VT.getVectorNumElements() == InVT.getVectorNumElements())
5671 return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5672 DL, VT, In);
5673
5674 return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
5675 : ISD::ZERO_EXTEND_VECTOR_INREG,
5676 DL, VT, In);
5677}
5678
5679/// Returns a vector_shuffle node for an unpackl operation.
5680static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5681 SDValue V1, SDValue V2) {
5682 SmallVector<int, 8> Mask;
5683 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5684 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5685}
5686
5687/// Returns a vector_shuffle node for an unpackh operation.
5688static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5689 SDValue V1, SDValue V2) {
5690 SmallVector<int, 8> Mask;
5691 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5692 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5693}
5694
5695/// Return a vector_shuffle of the specified vector of zero or undef vector.
5696/// This produces a shuffle where the low element of V2 is swizzled into the
5697/// zero/undef vector, landing at element Idx.
5698/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5699static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5700 bool IsZero,
5701 const X86Subtarget &Subtarget,
5702 SelectionDAG &DAG) {
5703 MVT VT = V2.getSimpleValueType();
5704 SDValue V1 = IsZero
5705 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5706 int NumElems = VT.getVectorNumElements();
5707 SmallVector<int, 16> MaskVec(NumElems);
5708 for (int i = 0; i != NumElems; ++i)
5709 // If this is the insertion idx, put the low elt of V2 here.
5710 MaskVec[i] = (i == Idx) ? NumElems : i;
5711 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5712}
5713
5714// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
5715static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
5716 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
5717 V = V.getOperand(0);
5718 return V;
5719}
5720
5721static const Constant *getTargetConstantFromNode(SDValue Op) {
5722 Op = peekThroughBitcasts(Op);
5723
5724 auto *Load = dyn_cast<LoadSDNode>(Op);
5725 if (!Load)
5726 return nullptr;
5727
5728 SDValue Ptr = Load->getBasePtr();
5729 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5730 Ptr->getOpcode() == X86ISD::WrapperRIP)
5731 Ptr = Ptr->getOperand(0);
5732
5733 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5734 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
5735 return nullptr;
5736
5737 return CNode->getConstVal();
5738}
5739
5740// Extract raw constant bits from constant pools.
5741static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5742 APInt &UndefElts,
5743 SmallVectorImpl<APInt> &EltBits,
5744 bool AllowWholeUndefs = true,
5745 bool AllowPartialUndefs = true) {
5746 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5746, __PRETTY_FUNCTION__))
;
5747
5748 Op = peekThroughBitcasts(Op);
5749
5750 EVT VT = Op.getValueType();
5751 unsigned SizeInBits = VT.getSizeInBits();
5752 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5752, __PRETTY_FUNCTION__))
;
5753 unsigned NumElts = SizeInBits / EltSizeInBits;
5754
5755 // Bitcast a source array of element bits to the target size.
5756 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5757 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5758 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5759 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5760, __PRETTY_FUNCTION__))
5760 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn358860/lib/Target/X86/X86ISelLowering.cpp"
, 5760, __PRETTY_FUNCTION__))
;
5761
5762 // Don't split if we don't allow undef bits.
5763 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5764 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5765 return false;
5766
5767 // If we're already the right size, don't bother bitcasting.
5768 if (NumSrcElts == NumElts) {
5769 UndefElts = UndefSrcElts;
5770 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5771 return true;
5772 }
5773
5774 // Extract all the undef/constant element data and pack into single bitsets.
5775 APInt UndefBits(SizeInBits, 0);
5776 APInt MaskBits(SizeInBits, 0);
5777
5778 for (unsigned i = 0; i != NumSrcElts; ++i) {
5779 unsigned BitOffset = i * SrcEltSizeInBits;
5780 if (UndefSrcElts[i])
5781 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5782 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5783 }
5784
5785 // Split the undef/constant single bitset data into the target elements.
5786 UndefElts = APInt(NumElts, 0);
5787 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5788
5789 for (unsigned i = 0; i != NumElts; ++i) {
5790 unsigned BitOffset = i * EltSizeInBits;
5791 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5792
5793 // Only treat an element as UNDEF if all bits are UNDEF.
5794 if (UndefEltBits.isAllOnesValue()) {
5795 if (!AllowWholeUndefs)
5796 return false;
5797 UndefElts.setBit(i);
5798 continue;
5799 }
5800
5801 // If only some bits are UNDEF then treat them as zero (or bail if not
5802 // supported).
5803 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5804 return false;
5805
5806 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5807 EltBits[i] = Bits.getZExtValue();
5808 }
5809 return true;
5810 };
5811
5812 // Collect constant bits and insert into mask/undef bit masks.
5813 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5814 unsigned UndefBitIndex) {
5815 if (!Cst)
5816 return false;
5817 if (isa<UndefValue>(Cst)) {
5818 Undefs.setBit(UndefBitIndex);
5819 return true;
5820 }
5821 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5822 Mask = CInt->getValue();
5823 return true;
5824 }
5825 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5826 Mask = CFP->getValueAPF().bitcastToAPInt();
5827 return true;
5828 }
5829 return false;
5830 };
5831
5832 // Handle UNDEFs.
5833 if (Op.isUndef()) {
5834 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5835 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5836 return CastBitData(UndefSrcElts, SrcEltBits);
5837 }
5838
5839 // Extract scalar constant bits.
5840 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5841 APInt UndefSrcElts = APInt::getNullValue(1);
5842 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5843 return CastBitData(UndefSrcElts, SrcEltBits);
5844 }
5845 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5846 APInt UndefSrcElts = APInt::getNullValue(1);
5847 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5848 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5849 return CastBitData(UndefSrcElts, SrcEltBits);
5850 }
5851
5852 // Extract constant bits from build vector.
5853 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5854 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5855 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5856
5857 APInt UndefSrcElts(NumSrcElts, 0);
5858 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5859 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5860 const SDValue &Src = Op.getOperand(i);
5861 if (Src.isUndef()) {
5862 UndefSrcElts.setBit(i);
5863 continue;
5864 }
5865 auto *Cst = cast<ConstantSDNode>(Src);
5866 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5867 }
5868 return CastBitData(UndefSrcElts, SrcEltBits);
5869 }
5870 if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
5871 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5872 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5873
5874 APInt UndefSrcElts(NumSrcElts, 0);
5875 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5876 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5877 const SDValue &Src = Op.getOperand(i);
5878 if (Src.isUndef()) {
5879 UndefSrcElts.setBit(i);
5880 continue;
5881 }
5882 auto *Cst = cast<ConstantFPSDNode>(Src);
5883 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5884 SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
5885 }
5886 return CastBitData(UndefSrcElts, SrcEltBits);
5887 }
5888
5889 // Extract constant bits from constant pool vector.
5890 if (auto *Cst = getTargetConstantFromNode(Op)) {
5891 Type *CstTy = Cst->getType();
5892 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
5893 if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5894 return false;
5895
5896 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5897 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5898
5899 APInt UndefSrcElts(NumSrcElts, 0);
5900 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5901 for (unsigned i = 0; i != NumSrcElts; ++i)
5902 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5903 UndefSrcElts, i))
5904 return false;
5905
5906 return CastBitData(UndefSrcElts, SrcEltBits);
5907 }
5908
5909 // Extract constant bits from a broadcasted constant pool scalar.
5910 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5911 EltSizeInBits <= VT.getScalarSizeInBits()) {
5912 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5913 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5914 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5915
5916 APInt UndefSrcElts(NumSrcElts, 0);
5917 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5918 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5919 if (UndefSrcElts[0])
5920 UndefSrcElts.setBits(0, NumSrcElts);
5921 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5922 return CastBitDat