Bug Summary

File:include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1139, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86 -I /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn361465/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn361465/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn361465=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-05-24-031927-21217-1 -x c++ /build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "Utils/X86ShuffleDecode.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86InstrBuilder.h"
19#include "X86IntrinsicsInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "X86TargetObjectFile.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/ADT/StringSwitch.h"
28#include "llvm/Analysis/EHPersonalities.h"
29#include "llvm/CodeGen/IntrinsicLowering.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineJumpTableInfo.h"
34#include "llvm/CodeGen/MachineModuleInfo.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/WinEHFuncInfo.h"
38#include "llvm/IR/CallSite.h"
39#include "llvm/IR/CallingConv.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/DiagnosticInfo.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/GlobalAlias.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCContext.h"
50#include "llvm/MC/MCExpr.h"
51#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownBits.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Target/TargetOptions.h"
58#include <algorithm>
59#include <bitset>
60#include <cctype>
61#include <numeric>
62using namespace llvm;
63
64#define DEBUG_TYPE"x86-isel" "x86-isel"
65
66STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
67
68static cl::opt<bool> ExperimentalVectorWideningLegalization(
69 "x86-experimental-vector-widening-legalization", cl::init(false),
70 cl::desc("Enable an experimental vector type legalization through widening "
71 "rather than promotion."),
72 cl::Hidden);
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc("Sets the preferable loop alignment for experiments "
77 "(the last x86-experimental-pref-loop-alignment bits"
78 " of the loop header PC will be 0)."),
79 cl::Hidden);
80
81static cl::opt<bool> MulConstantOptimization(
82 "mul-constant-optimization", cl::init(true),
83 cl::desc("Replace 'mul x, Const' with more effective instructions like "
84 "SHIFT, LEA, etc."),
85 cl::Hidden);
86
87/// Call this when the user attempts to do something unsupported, like
88/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
89/// report_fatal_error, so calling code should attempt to recover without
90/// crashing.
91static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
92 const char *Msg) {
93 MachineFunction &MF = DAG.getMachineFunction();
94 DAG.getContext()->diagnose(
95 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
96}
97
98X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
99 const X86Subtarget &STI)
100 : TargetLowering(TM), Subtarget(STI) {
101 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
102 X86ScalarSSEf64 = Subtarget.hasSSE2();
103 X86ScalarSSEf32 = Subtarget.hasSSE1();
104 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
105
106 // Set up the TargetLowering object.
107
108 // X86 is weird. It always uses i8 for shift amounts and setcc results.
109 setBooleanContents(ZeroOrOneBooleanContent);
110 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
111 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
112
113 // For 64-bit, since we have so many registers, use the ILP scheduler.
114 // For 32-bit, use the register pressure specific scheduling.
115 // For Atom, always use ILP scheduling.
116 if (Subtarget.isAtom())
117 setSchedulingPreference(Sched::ILP);
118 else if (Subtarget.is64Bit())
119 setSchedulingPreference(Sched::ILP);
120 else
121 setSchedulingPreference(Sched::RegPressure);
122 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
123 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
124
125 // Bypass expensive divides and use cheaper ones.
126 if (TM.getOptLevel() >= CodeGenOpt::Default) {
127 if (Subtarget.hasSlowDivide32())
128 addBypassSlowDiv(32, 8);
129 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
130 addBypassSlowDiv(64, 32);
131 }
132
133 if (Subtarget.isTargetKnownWindowsMSVC() ||
134 Subtarget.isTargetWindowsItanium()) {
135 // Setup Windows compiler runtime calls.
136 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
137 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
138 setLibcallName(RTLIB::SREM_I64, "_allrem");
139 setLibcallName(RTLIB::UREM_I64, "_aullrem");
140 setLibcallName(RTLIB::MUL_I64, "_allmul");
141 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
142 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
143 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
146 }
147
148 if (Subtarget.isTargetDarwin()) {
149 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
150 setUseUnderscoreSetJmp(false);
151 setUseUnderscoreLongJmp(false);
152 } else if (Subtarget.isTargetWindowsGNU()) {
153 // MS runtime is weird: it exports _setjmp, but longjmp!
154 setUseUnderscoreSetJmp(true);
155 setUseUnderscoreLongJmp(false);
156 } else {
157 setUseUnderscoreSetJmp(true);
158 setUseUnderscoreLongJmp(true);
159 }
160
161 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
162 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
163 // FIXME: Should we be limitting the atomic size on other configs? Default is
164 // 1024.
165 if (!Subtarget.hasCmpxchg8b())
166 setMaxAtomicSizeInBitsSupported(32);
167
168 // Set up the register classes.
169 addRegisterClass(MVT::i8, &X86::GR8RegClass);
170 addRegisterClass(MVT::i16, &X86::GR16RegClass);
171 addRegisterClass(MVT::i32, &X86::GR32RegClass);
172 if (Subtarget.is64Bit())
173 addRegisterClass(MVT::i64, &X86::GR64RegClass);
174
175 for (MVT VT : MVT::integer_valuetypes())
176 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
177
178 // We don't accept any truncstore of integer registers.
179 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
180 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
181 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
182 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
183 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
184 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
185
186 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
188 // SETOEQ and SETUNE require checking two conditions.
189 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
190 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
191 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
192 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
193 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
194 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
195
196 // Integer absolute.
197 if (Subtarget.hasCMov()) {
198 setOperationAction(ISD::ABS , MVT::i16 , Custom);
199 setOperationAction(ISD::ABS , MVT::i32 , Custom);
200 }
201 setOperationAction(ISD::ABS , MVT::i64 , Custom);
202
203 // Funnel shifts.
204 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
205 setOperationAction(ShiftOp , MVT::i16 , Custom);
206 setOperationAction(ShiftOp , MVT::i32 , Custom);
207 if (Subtarget.is64Bit())
208 setOperationAction(ShiftOp , MVT::i64 , Custom);
209 }
210
211 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
212 // operation.
213 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
214 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
215 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
216
217 if (Subtarget.is64Bit()) {
218 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
219 // f32/f64 are legal, f80 is custom.
220 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
221 else
222 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
223 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
224 } else if (!Subtarget.useSoftFloat()) {
225 // We have an algorithm for SSE2->double, and we turn this into a
226 // 64-bit FILD followed by conditional FADD for other targets.
227 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
228 // We have an algorithm for SSE2, and we turn this into a 64-bit
229 // FILD or VCVTUSI2SS/SD for other targets.
230 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
231 } else {
232 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
233 }
234
235 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
236 // this operation.
237 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
239
240 if (!Subtarget.useSoftFloat()) {
241 // SSE has no i16 to fp conversion, only i32.
242 if (X86ScalarSSEf32) {
243 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
244 // f32 and f64 cases are Legal, f80 case is not
245 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
246 } else {
247 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
248 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
249 }
250 } else {
251 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
252 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
253 }
254
255 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
256 // this operation.
257 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
259
260 if (!Subtarget.useSoftFloat()) {
261 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
262 // are Legal, f80 is custom lowered.
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
264 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
265
266 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
267 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
268 } else {
269 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
270 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
271 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
272 }
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
277 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
278 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
279
280 if (Subtarget.is64Bit()) {
281 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
282 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
283 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
284 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
285 } else {
286 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
287 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
288 }
289 } else if (!Subtarget.useSoftFloat()) {
290 // Since AVX is a superset of SSE3, only check for SSE here.
291 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
292 // Expand FP_TO_UINT into a select.
293 // FIXME: We would like to use a Custom expander here eventually to do
294 // the optimal thing for SSE vs. the default expansion in the legalizer.
295 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
296 else
297 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
298 // With SSE3 we can use fisttpll to convert to a signed i64; without
299 // SSE, we're stuck with a fistpll.
300 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
301
302 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
303 }
304
305 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
306 if (!X86ScalarSSEf64) {
307 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
308 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
309 if (Subtarget.is64Bit()) {
310 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
311 // Without SSE, i64->f64 goes through memory.
312 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
313 }
314 } else if (!Subtarget.is64Bit())
315 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
316
317 // Scalar integer divide and remainder are lowered to use operations that
318 // produce two results, to match the available instructions. This exposes
319 // the two-result form to trivial CSE, which is able to combine x/y and x%y
320 // into a single instruction.
321 //
322 // Scalar integer multiply-high is also lowered to use two-result
323 // operations, to match the available instructions. However, plain multiply
324 // (low) operations are left as Legal, as there are single-result
325 // instructions for this in x86. Using the two-result multiply instructions
326 // when both high and low results are needed must be arranged by dagcombine.
327 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328 setOperationAction(ISD::MULHS, VT, Expand);
329 setOperationAction(ISD::MULHU, VT, Expand);
330 setOperationAction(ISD::SDIV, VT, Expand);
331 setOperationAction(ISD::UDIV, VT, Expand);
332 setOperationAction(ISD::SREM, VT, Expand);
333 setOperationAction(ISD::UREM, VT, Expand);
334 }
335
336 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
337 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
338 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
339 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
340 setOperationAction(ISD::BR_CC, VT, Expand);
341 setOperationAction(ISD::SELECT_CC, VT, Expand);
342 }
343 if (Subtarget.is64Bit())
344 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
345 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
346 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
347 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
348 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
349
350 setOperationAction(ISD::FREM , MVT::f32 , Expand);
351 setOperationAction(ISD::FREM , MVT::f64 , Expand);
352 setOperationAction(ISD::FREM , MVT::f80 , Expand);
353 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
354
355 // Promote the i8 variants and force them on up to i32 which has a shorter
356 // encoding.
357 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
358 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
359 if (!Subtarget.hasBMI()) {
360 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
361 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
362 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
363 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
364 if (Subtarget.is64Bit()) {
365 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
366 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
367 }
368 }
369
370 if (Subtarget.hasLZCNT()) {
371 // When promoting the i8 variants, force them to i32 for a shorter
372 // encoding.
373 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
374 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
375 } else {
376 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
377 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
378 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
379 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
380 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
381 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
382 if (Subtarget.is64Bit()) {
383 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
384 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
385 }
386 }
387
388 // Special handling for half-precision floating point conversions.
389 // If we don't have F16C support, then lower half float conversions
390 // into library calls.
391 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
392 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
393 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
394 }
395
396 // There's never any support for operations beyond MVT::f32.
397 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
398 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
399 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
400 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
401
402 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
403 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
404 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
405 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
406 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
407 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
408
409 if (Subtarget.hasPOPCNT()) {
410 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
411 } else {
412 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
413 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
414 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
415 if (Subtarget.is64Bit())
416 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
417 else
418 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
419 }
420
421 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
422
423 if (!Subtarget.hasMOVBE())
424 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
425
426 // These should be promoted to a larger select which is supported.
427 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
428 // X86 wants to expand cmov itself.
429 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
430 setOperationAction(ISD::SELECT, VT, Custom);
431 setOperationAction(ISD::SETCC, VT, Custom);
432 }
433 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
434 if (VT == MVT::i64 && !Subtarget.is64Bit())
435 continue;
436 setOperationAction(ISD::SELECT, VT, Custom);
437 setOperationAction(ISD::SETCC, VT, Custom);
438 }
439
440 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
441 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
442 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
443
444 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
445 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
446 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
447 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
448 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
450 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
451 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
452
453 // Darwin ABI issue.
454 for (auto VT : { MVT::i32, MVT::i64 }) {
455 if (VT == MVT::i64 && !Subtarget.is64Bit())
456 continue;
457 setOperationAction(ISD::ConstantPool , VT, Custom);
458 setOperationAction(ISD::JumpTable , VT, Custom);
459 setOperationAction(ISD::GlobalAddress , VT, Custom);
460 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
461 setOperationAction(ISD::ExternalSymbol , VT, Custom);
462 setOperationAction(ISD::BlockAddress , VT, Custom);
463 }
464
465 // 64-bit shl, sra, srl (iff 32-bit x86)
466 for (auto VT : { MVT::i32, MVT::i64 }) {
467 if (VT == MVT::i64 && !Subtarget.is64Bit())
468 continue;
469 setOperationAction(ISD::SHL_PARTS, VT, Custom);
470 setOperationAction(ISD::SRA_PARTS, VT, Custom);
471 setOperationAction(ISD::SRL_PARTS, VT, Custom);
472 }
473
474 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
475 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
476
477 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
478
479 // Expand certain atomics
480 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
484 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
485 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
486 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
487 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
488 }
489
490 if (!Subtarget.is64Bit())
491 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
492
493 if (Subtarget.hasCmpxchg16b()) {
494 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
495 }
496
497 // FIXME - use subtarget debug flags
498 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
499 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
500 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
501 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
502 }
503
504 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
505 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
506
507 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
508 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
509
510 setOperationAction(ISD::TRAP, MVT::Other, Legal);
511 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
512
513 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
514 setOperationAction(ISD::VASTART , MVT::Other, Custom);
515 setOperationAction(ISD::VAEND , MVT::Other, Expand);
516 bool Is64Bit = Subtarget.is64Bit();
517 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
518 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
519
520 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
521 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
522
523 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
524
525 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
526 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
527 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
528
529 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
530 // f32 and f64 use SSE.
531 // Set up the FP register classes.
532 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
533 : &X86::FR32RegClass);
534 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
535 : &X86::FR64RegClass);
536
537 for (auto VT : { MVT::f32, MVT::f64 }) {
538 // Use ANDPD to simulate FABS.
539 setOperationAction(ISD::FABS, VT, Custom);
540
541 // Use XORP to simulate FNEG.
542 setOperationAction(ISD::FNEG, VT, Custom);
543
544 // Use ANDPD and ORPD to simulate FCOPYSIGN.
545 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
546
547 // These might be better off as horizontal vector ops.
548 setOperationAction(ISD::FADD, VT, Custom);
549 setOperationAction(ISD::FSUB, VT, Custom);
550
551 // We don't support sin/cos/fmod
552 setOperationAction(ISD::FSIN , VT, Expand);
553 setOperationAction(ISD::FCOS , VT, Expand);
554 setOperationAction(ISD::FSINCOS, VT, Expand);
555 }
556
557 // Lower this to MOVMSK plus an AND.
558 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
559 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
560
561 } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
562 // Use SSE for f32, x87 for f64.
563 // Set up the FP register classes.
564 addRegisterClass(MVT::f32, &X86::FR32RegClass);
565 if (UseX87)
566 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
567
568 // Use ANDPS to simulate FABS.
569 setOperationAction(ISD::FABS , MVT::f32, Custom);
570
571 // Use XORP to simulate FNEG.
572 setOperationAction(ISD::FNEG , MVT::f32, Custom);
573
574 if (UseX87)
575 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
576
577 // Use ANDPS and ORPS to simulate FCOPYSIGN.
578 if (UseX87)
579 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
580 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
581
582 // We don't support sin/cos/fmod
583 setOperationAction(ISD::FSIN , MVT::f32, Expand);
584 setOperationAction(ISD::FCOS , MVT::f32, Expand);
585 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
586
587 if (UseX87) {
588 // Always expand sin/cos functions even though x87 has an instruction.
589 setOperationAction(ISD::FSIN, MVT::f64, Expand);
590 setOperationAction(ISD::FCOS, MVT::f64, Expand);
591 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
592 }
593 } else if (UseX87) {
594 // f32 and f64 in x87.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
597 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
598
599 for (auto VT : { MVT::f32, MVT::f64 }) {
600 setOperationAction(ISD::UNDEF, VT, Expand);
601 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
602
603 // Always expand sin/cos functions even though x87 has an instruction.
604 setOperationAction(ISD::FSIN , VT, Expand);
605 setOperationAction(ISD::FCOS , VT, Expand);
606 setOperationAction(ISD::FSINCOS, VT, Expand);
607 }
608 }
609
610 // Expand FP32 immediates into loads from the stack, save special cases.
611 if (isTypeLegal(MVT::f32)) {
612 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
613 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
614 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
615 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
616 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
617 } else // SSE immediates.
618 addLegalFPImmediate(APFloat(+0.0f)); // xorps
619 }
620 // Expand FP64 immediates into loads from the stack, save special cases.
621 if (isTypeLegal(MVT::f64)) {
622 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
623 addLegalFPImmediate(APFloat(+0.0)); // FLD0
624 addLegalFPImmediate(APFloat(+1.0)); // FLD1
625 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
626 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
627 } else // SSE immediates.
628 addLegalFPImmediate(APFloat(+0.0)); // xorpd
629 }
630
631 // We don't support FMA.
632 setOperationAction(ISD::FMA, MVT::f64, Expand);
633 setOperationAction(ISD::FMA, MVT::f32, Expand);
634
635 // Long double always uses X87, except f128 in MMX.
636 if (UseX87) {
637 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
638 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
639 : &X86::VR128RegClass);
640 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
641 setOperationAction(ISD::FABS , MVT::f128, Custom);
642 setOperationAction(ISD::FNEG , MVT::f128, Custom);
643 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
644 }
645
646 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
647 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
648 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
649 {
650 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
651 addLegalFPImmediate(TmpFlt); // FLD0
652 TmpFlt.changeSign();
653 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
654
655 bool ignored;
656 APFloat TmpFlt2(+1.0);
657 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
658 &ignored);
659 addLegalFPImmediate(TmpFlt2); // FLD1
660 TmpFlt2.changeSign();
661 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
662 }
663
664 // Always expand sin/cos functions even though x87 has an instruction.
665 setOperationAction(ISD::FSIN , MVT::f80, Expand);
666 setOperationAction(ISD::FCOS , MVT::f80, Expand);
667 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
668
669 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
670 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
671 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
672 setOperationAction(ISD::FRINT, MVT::f80, Expand);
673 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
674 setOperationAction(ISD::FMA, MVT::f80, Expand);
675 setOperationAction(ISD::LROUND, MVT::f80, Expand);
676 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
677 }
678
679 // Always use a library call for pow.
680 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
681 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
682 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
683
684 setOperationAction(ISD::FLOG, MVT::f80, Expand);
685 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
686 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
687 setOperationAction(ISD::FEXP, MVT::f80, Expand);
688 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
689 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
690 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
691
692 // Some FP actions are always expanded for vector types.
693 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
694 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
695 setOperationAction(ISD::FSIN, VT, Expand);
696 setOperationAction(ISD::FSINCOS, VT, Expand);
697 setOperationAction(ISD::FCOS, VT, Expand);
698 setOperationAction(ISD::FREM, VT, Expand);
699 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
700 setOperationAction(ISD::FPOW, VT, Expand);
701 setOperationAction(ISD::FLOG, VT, Expand);
702 setOperationAction(ISD::FLOG2, VT, Expand);
703 setOperationAction(ISD::FLOG10, VT, Expand);
704 setOperationAction(ISD::FEXP, VT, Expand);
705 setOperationAction(ISD::FEXP2, VT, Expand);
706 }
707
708 // First set operation action for all vector types to either promote
709 // (for widening) or expand (for scalarization). Then we will selectively
710 // turn on ones that can be effectively codegen'd.
711 for (MVT VT : MVT::vector_valuetypes()) {
712 setOperationAction(ISD::SDIV, VT, Expand);
713 setOperationAction(ISD::UDIV, VT, Expand);
714 setOperationAction(ISD::SREM, VT, Expand);
715 setOperationAction(ISD::UREM, VT, Expand);
716 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
717 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
718 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
719 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
720 setOperationAction(ISD::FMA, VT, Expand);
721 setOperationAction(ISD::FFLOOR, VT, Expand);
722 setOperationAction(ISD::FCEIL, VT, Expand);
723 setOperationAction(ISD::FTRUNC, VT, Expand);
724 setOperationAction(ISD::FRINT, VT, Expand);
725 setOperationAction(ISD::FNEARBYINT, VT, Expand);
726 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
727 setOperationAction(ISD::MULHS, VT, Expand);
728 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
729 setOperationAction(ISD::MULHU, VT, Expand);
730 setOperationAction(ISD::SDIVREM, VT, Expand);
731 setOperationAction(ISD::UDIVREM, VT, Expand);
732 setOperationAction(ISD::CTPOP, VT, Expand);
733 setOperationAction(ISD::CTTZ, VT, Expand);
734 setOperationAction(ISD::CTLZ, VT, Expand);
735 setOperationAction(ISD::ROTL, VT, Expand);
736 setOperationAction(ISD::ROTR, VT, Expand);
737 setOperationAction(ISD::BSWAP, VT, Expand);
738 setOperationAction(ISD::SETCC, VT, Expand);
739 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
740 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
741 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
742 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
743 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
744 setOperationAction(ISD::TRUNCATE, VT, Expand);
745 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
746 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
747 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
748 setOperationAction(ISD::SELECT_CC, VT, Expand);
749 for (MVT InnerVT : MVT::vector_valuetypes()) {
750 setTruncStoreAction(InnerVT, VT, Expand);
751
752 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
753 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
754
755 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
756 // types, we have to deal with them whether we ask for Expansion or not.
757 // Setting Expand causes its own optimisation problems though, so leave
758 // them legal.
759 if (VT.getVectorElementType() == MVT::i1)
760 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
761
762 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
763 // split/scalarized right now.
764 if (VT.getVectorElementType() == MVT::f16)
765 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
766 }
767 }
768
769 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
770 // with -msoft-float, disable use of MMX as well.
771 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
772 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
773 // No operations on x86mmx supported, everything uses intrinsics.
774 }
775
776 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
777 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
778 : &X86::VR128RegClass);
779
780 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
781 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
782 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
783 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
784 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
785 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
786 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
787 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
788 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
789 }
790
791 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
792 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
793 : &X86::VR128RegClass);
794
795 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
796 // registers cannot be used even for integer operations.
797 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
798 : &X86::VR128RegClass);
799 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
800 : &X86::VR128RegClass);
801 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
802 : &X86::VR128RegClass);
803 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
804 : &X86::VR128RegClass);
805
806 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
807 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
808 setOperationAction(ISD::SDIV, VT, Custom);
809 setOperationAction(ISD::SREM, VT, Custom);
810 setOperationAction(ISD::UDIV, VT, Custom);
811 setOperationAction(ISD::UREM, VT, Custom);
812 }
813
814 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
815 setOperationAction(ISD::MUL, MVT::v2i16, Custom);
816 setOperationAction(ISD::MUL, MVT::v2i32, Custom);
817 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
818 setOperationAction(ISD::MUL, MVT::v4i16, Custom);
819 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
820
821 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
822 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
823 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
824 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
825 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
826 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
827 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
828 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
829 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
830 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
831 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
832 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
833 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
834
835 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
836 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
837 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
838 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
839 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
840 }
841
842 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
843 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
844 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
845 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
846 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
847 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
848 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
849 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
850 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
851 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
852 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
853 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
854
855 if (!ExperimentalVectorWideningLegalization) {
856 // Use widening instead of promotion.
857 for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
858 MVT::v4i16, MVT::v2i16 }) {
859 setOperationAction(ISD::UADDSAT, VT, Custom);
860 setOperationAction(ISD::SADDSAT, VT, Custom);
861 setOperationAction(ISD::USUBSAT, VT, Custom);
862 setOperationAction(ISD::SSUBSAT, VT, Custom);
863 }
864 }
865
866 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
867 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
868 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
869
870 // Provide custom widening for v2f32 setcc. This is really for VLX when
871 // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
872 // type legalization changing the result type to v4i1 during widening.
873 // It works fine for SSE2 and is probably faster so no need to qualify with
874 // VLX support.
875 setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
876
877 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
878 setOperationAction(ISD::SETCC, VT, Custom);
879 setOperationAction(ISD::CTPOP, VT, Custom);
880 setOperationAction(ISD::ABS, VT, Custom);
881
882 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
883 // setcc all the way to isel and prefer SETGT in some isel patterns.
884 setCondCodeAction(ISD::SETLT, VT, Custom);
885 setCondCodeAction(ISD::SETLE, VT, Custom);
886 }
887
888 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
889 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
890 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
891 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
892 setOperationAction(ISD::VSELECT, VT, Custom);
893 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
894 }
895
896 // We support custom legalizing of sext and anyext loads for specific
897 // memory vector types which we can load as a scalar (or sequence of
898 // scalars) and extend in-register to a legal 128-bit vector type. For sext
899 // loads these must work with a single scalar load.
900 for (MVT VT : MVT::integer_vector_valuetypes()) {
901 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
902 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
903 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
904 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
905 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
906 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
907 }
908
909 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
910 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
911 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
912 setOperationAction(ISD::VSELECT, VT, Custom);
913
914 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
915 continue;
916
917 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
918 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
919 }
920
921 // Custom lower v2i64 and v2f64 selects.
922 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
923 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
924 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
925 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
926 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
927
928 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
929 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
930 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
931
932 // Custom legalize these to avoid over promotion or custom promotion.
933 setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
934 setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
935 setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
936 setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
937 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
938 setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
939 setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
940 setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
941 setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
942 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
943
944 // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
945 // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
946 // split again based on the input type, this will cause an AssertSExt i16 to
947 // be emitted instead of an AssertZExt. This will allow packssdw followed by
948 // packuswb to be used to truncate to v8i8. This is necessary since packusdw
949 // isn't available until sse4.1.
950 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
951
952 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
953 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
954
955 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
956
957 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
958 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
959
960 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
961 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
962
963 for (MVT VT : MVT::fp_vector_valuetypes())
964 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
965
966 // We want to legalize this to an f64 load rather than an i64 load on
967 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
968 // store.
969 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
970 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
971 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
972 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
973 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
974 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
975 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
976 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
977
978 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
979 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
980 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
981 if (!Subtarget.hasAVX512())
982 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
983
984 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
985 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
986 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
987
988 if (ExperimentalVectorWideningLegalization) {
989 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
990
991 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
992 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
993 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
994 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
995 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
996 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
997 } else {
998 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
999 }
1000
1001 // In the customized shift lowering, the legal v4i32/v2i64 cases
1002 // in AVX2 will be recognized.
1003 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1004 setOperationAction(ISD::SRL, VT, Custom);
1005 setOperationAction(ISD::SHL, VT, Custom);
1006 setOperationAction(ISD::SRA, VT, Custom);
1007 }
1008
1009 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1010 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1011
1012 // With AVX512, expanding (and promoting the shifts) is better.
1013 if (!Subtarget.hasAVX512())
1014 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1015 }
1016
1017 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1018 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1019 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1020 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1021 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1022 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1023 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1024 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1025 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1026
1027 // These might be better off as horizontal vector ops.
1028 setOperationAction(ISD::ADD, MVT::i16, Custom);
1029 setOperationAction(ISD::ADD, MVT::i32, Custom);
1030 setOperationAction(ISD::SUB, MVT::i16, Custom);
1031 setOperationAction(ISD::SUB, MVT::i32, Custom);
1032 }
1033
1034 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1035 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1036 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1037 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1038 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1039 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1040 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1041 }
1042
1043 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1044 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1045 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1046 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1047 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1048 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1049 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1050 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1051
1052 // FIXME: Do we need to handle scalar-to-vector here?
1053 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1054
1055 // We directly match byte blends in the backend as they match the VSELECT
1056 // condition form.
1057 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1058
1059 // SSE41 brings specific instructions for doing vector sign extend even in
1060 // cases where we don't have SRA.
1061 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1062 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1063 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1064 }
1065
1066 if (!ExperimentalVectorWideningLegalization) {
1067 // Avoid narrow result types when widening. The legal types are listed
1068 // in the next loop.
1069 for (MVT VT : MVT::integer_vector_valuetypes()) {
1070 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1071 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1072 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1073 }
1074 }
1075
1076 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1077 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1078 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1079 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1080 if (!ExperimentalVectorWideningLegalization)
1081 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
1082 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1083 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1084 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1085 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1086 }
1087
1088 // i8 vectors are custom because the source register and source
1089 // source memory operand types are not the same width.
1090 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1091 }
1092
1093 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1094 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1095 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1096 setOperationAction(ISD::ROTL, VT, Custom);
1097
1098 // XOP can efficiently perform BITREVERSE with VPPERM.
1099 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1100 setOperationAction(ISD::BITREVERSE, VT, Custom);
1101
1102 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1103 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1104 setOperationAction(ISD::BITREVERSE, VT, Custom);
1105 }
1106
1107 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1108 bool HasInt256 = Subtarget.hasInt256();
1109
1110 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1111 : &X86::VR256RegClass);
1112 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1113 : &X86::VR256RegClass);
1114 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1115 : &X86::VR256RegClass);
1116 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1117 : &X86::VR256RegClass);
1118 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1119 : &X86::VR256RegClass);
1120 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1121 : &X86::VR256RegClass);
1122
1123 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1124 setOperationAction(ISD::FFLOOR, VT, Legal);
1125 setOperationAction(ISD::FCEIL, VT, Legal);
1126 setOperationAction(ISD::FTRUNC, VT, Legal);
1127 setOperationAction(ISD::FRINT, VT, Legal);
1128 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1129 setOperationAction(ISD::FNEG, VT, Custom);
1130 setOperationAction(ISD::FABS, VT, Custom);
1131 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1132 }
1133
1134 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1135 // even though v8i16 is a legal type.
1136 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1137 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1138 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1139
1140 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1141 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1142
1143 if (!Subtarget.hasAVX512())
1144 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1145
1146 for (MVT VT : MVT::fp_vector_valuetypes())
1147 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1148
1149 // In the customized shift lowering, the legal v8i32/v4i64 cases
1150 // in AVX2 will be recognized.
1151 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1152 setOperationAction(ISD::SRL, VT, Custom);
1153 setOperationAction(ISD::SHL, VT, Custom);
1154 setOperationAction(ISD::SRA, VT, Custom);
1155 }
1156
1157 // These types need custom splitting if their input is a 128-bit vector.
1158 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1159 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1160 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1161 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1162
1163 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1164 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1165
1166 // With BWI, expanding (and promoting the shifts) is the better.
1167 if (!Subtarget.hasBWI())
1168 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1169
1170 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1171 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1172 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1173 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1174 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1175 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1176
1177 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1178 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1179 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1180 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1181 }
1182
1183 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1184 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1185 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1186 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1187
1188 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1189 setOperationAction(ISD::SETCC, VT, Custom);
1190 setOperationAction(ISD::CTPOP, VT, Custom);
1191 setOperationAction(ISD::CTLZ, VT, Custom);
1192
1193 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1194 // setcc all the way to isel and prefer SETGT in some isel patterns.
1195 setCondCodeAction(ISD::SETLT, VT, Custom);
1196 setCondCodeAction(ISD::SETLE, VT, Custom);
1197 }
1198
1199 if (Subtarget.hasAnyFMA()) {
1200 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1201 MVT::v2f64, MVT::v4f64 })
1202 setOperationAction(ISD::FMA, VT, Legal);
1203 }
1204
1205 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1206 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1207 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1208 }
1209
1210 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1211 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1212 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1213 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1214
1215 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1216 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1217 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1218 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1219 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1220 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1221
1222 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1223 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1224 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1225 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1226 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1227
1228 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1229 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1230 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1231 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1232 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1233 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1234 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1235 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1236
1237 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1238 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1239 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1240 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1241 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1242 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1243 }
1244
1245 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1246 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1247 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1248 }
1249
1250 if (HasInt256) {
1251 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1252 // when we have a 256bit-wide blend with immediate.
1253 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1254
1255 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1256 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1257 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1258 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1259 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1260 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1261 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1262 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1263 }
1264 }
1265
1266 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1267 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1268 setOperationAction(ISD::MLOAD, VT, Legal);
1269 setOperationAction(ISD::MSTORE, VT, Legal);
1270 }
1271
1272 // Extract subvector is special because the value type
1273 // (result) is 128-bit but the source is 256-bit wide.
1274 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1275 MVT::v4f32, MVT::v2f64 }) {
1276 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1277 }
1278
1279 // Custom lower several nodes for 256-bit types.
1280 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1281 MVT::v8f32, MVT::v4f64 }) {
1282 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1283 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1284 setOperationAction(ISD::VSELECT, VT, Custom);
1285 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1286 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1287 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1288 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1289 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1290 }
1291
1292 if (HasInt256)
1293 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1294
1295 if (HasInt256) {
1296 // Custom legalize 2x32 to get a little better code.
1297 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1298 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1299
1300 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1301 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1302 setOperationAction(ISD::MGATHER, VT, Custom);
1303 }
1304 }
1305
1306 // This block controls legalization of the mask vector sizes that are
1307 // available with AVX512. 512-bit vectors are in a separate block controlled
1308 // by useAVX512Regs.
1309 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1310 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1311 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1312 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1313 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1314 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1315
1316 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1318 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1319
1320 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1321 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1322 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1323 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1324 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1325 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1326
1327 // There is no byte sized k-register load or store without AVX512DQ.
1328 if (!Subtarget.hasDQI()) {
1329 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1330 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1331 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1332 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1333
1334 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1335 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1336 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1337 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1338 }
1339
1340 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1341 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1342 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1343 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1344 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1345 }
1346
1347 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1348 setOperationAction(ISD::ADD, VT, Custom);
1349 setOperationAction(ISD::SUB, VT, Custom);
1350 setOperationAction(ISD::MUL, VT, Custom);
1351 setOperationAction(ISD::SETCC, VT, Custom);
1352 setOperationAction(ISD::SELECT, VT, Custom);
1353 setOperationAction(ISD::TRUNCATE, VT, Custom);
1354 setOperationAction(ISD::UADDSAT, VT, Custom);
1355 setOperationAction(ISD::SADDSAT, VT, Custom);
1356 setOperationAction(ISD::USUBSAT, VT, Custom);
1357 setOperationAction(ISD::SSUBSAT, VT, Custom);
1358
1359 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1361 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1362 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1363 setOperationAction(ISD::VSELECT, VT, Expand);
1364 }
1365
1366 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1367 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1368 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1369 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
1370 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1371 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1372 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1373 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1374 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1375 }
1376
1377 // This block controls legalization for 512-bit operations with 32/64 bit
1378 // elements. 512-bits can be disabled based on prefer-vector-width and
1379 // required-vector-width function attributes.
1380 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1381 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1382 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1383 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1384 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1385
1386 for (MVT VT : MVT::fp_vector_valuetypes())
1387 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1388
1389 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1390 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1391 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1392 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1393 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1394 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1395 }
1396
1397 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1398 setOperationAction(ISD::FNEG, VT, Custom);
1399 setOperationAction(ISD::FABS, VT, Custom);
1400 setOperationAction(ISD::FMA, VT, Legal);
1401 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1402 }
1403
1404 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1405 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1406 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1407 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1408 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1409 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1410 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1411 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1412 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1413 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1414
1415 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1416 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1417 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1418 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1419 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1420
1421 if (!Subtarget.hasVLX()) {
1422 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1423 // to 512-bit rather than use the AVX2 instructions so that we can use
1424 // k-masks.
1425 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1426 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1427 setOperationAction(ISD::MLOAD, VT, Custom);
1428 setOperationAction(ISD::MSTORE, VT, Custom);
1429 }
1430 }
1431
1432 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1433 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1434 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1435 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1436 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1437 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1438 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1439 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1440
1441 if (ExperimentalVectorWideningLegalization) {
1442 // Need to custom widen this if we don't have AVX512BW.
1443 setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
1444 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
1445 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
1446 }
1447
1448 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1449 setOperationAction(ISD::FFLOOR, VT, Legal);
1450 setOperationAction(ISD::FCEIL, VT, Legal);
1451 setOperationAction(ISD::FTRUNC, VT, Legal);
1452 setOperationAction(ISD::FRINT, VT, Legal);
1453 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1454 }
1455
1456 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1457 for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
1458 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1459 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1460 }
1461
1462 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1463 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1464 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1465 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1466
1467 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1468 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1469
1470 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1471 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1472
1473 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1474 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1475 setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
1476 setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
1477 setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
1478 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1479
1480 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1481 setOperationAction(ISD::SMAX, VT, Legal);
1482 setOperationAction(ISD::UMAX, VT, Legal);
1483 setOperationAction(ISD::SMIN, VT, Legal);
1484 setOperationAction(ISD::UMIN, VT, Legal);
1485 setOperationAction(ISD::ABS, VT, Legal);
1486 setOperationAction(ISD::SRL, VT, Custom);
1487 setOperationAction(ISD::SHL, VT, Custom);
1488 setOperationAction(ISD::SRA, VT, Custom);
1489 setOperationAction(ISD::CTPOP, VT, Custom);
1490 setOperationAction(ISD::ROTL, VT, Custom);
1491 setOperationAction(ISD::ROTR, VT, Custom);
1492 setOperationAction(ISD::SETCC, VT, Custom);
1493
1494 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1495 // setcc all the way to isel and prefer SETGT in some isel patterns.
1496 setCondCodeAction(ISD::SETLT, VT, Custom);
1497 setCondCodeAction(ISD::SETLE, VT, Custom);
1498 }
1499
1500 if (Subtarget.hasDQI()) {
1501 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1502 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1503 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1504 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1505
1506 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1507 }
1508
1509 if (Subtarget.hasCDI()) {
1510 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1511 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1512 setOperationAction(ISD::CTLZ, VT, Legal);
1513 }
1514 } // Subtarget.hasCDI()
1515
1516 if (Subtarget.hasVPOPCNTDQ()) {
1517 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1518 setOperationAction(ISD::CTPOP, VT, Legal);
1519 }
1520
1521 // Extract subvector is special because the value type
1522 // (result) is 256-bit but the source is 512-bit wide.
1523 // 128-bit was made Legal under AVX1.
1524 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1525 MVT::v8f32, MVT::v4f64 })
1526 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1527
1528 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1529 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1530 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1531 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1532 setOperationAction(ISD::VSELECT, VT, Custom);
1533 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1534 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1535 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1536 setOperationAction(ISD::MLOAD, VT, Legal);
1537 setOperationAction(ISD::MSTORE, VT, Legal);
1538 setOperationAction(ISD::MGATHER, VT, Custom);
1539 setOperationAction(ISD::MSCATTER, VT, Custom);
1540 }
1541 // Need to custom split v32i16/v64i8 bitcasts.
1542 if (!Subtarget.hasBWI()) {
1543 setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
1544 setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
1545 }
1546
1547 if (Subtarget.hasVBMI2()) {
1548 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1549 setOperationAction(ISD::FSHL, VT, Custom);
1550 setOperationAction(ISD::FSHR, VT, Custom);
1551 }
1552 }
1553 }// has AVX-512
1554
1555 // This block controls legalization for operations that don't have
1556 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1557 // narrower widths.
1558 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1559 // These operations are handled on non-VLX by artificially widening in
1560 // isel patterns.
1561 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1562
1563 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1564 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1565 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1566 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1567 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1568
1569 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1570 setOperationAction(ISD::SMAX, VT, Legal);
1571 setOperationAction(ISD::UMAX, VT, Legal);
1572 setOperationAction(ISD::SMIN, VT, Legal);
1573 setOperationAction(ISD::UMIN, VT, Legal);
1574 setOperationAction(ISD::ABS, VT, Legal);
1575 }
1576
1577 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1578 setOperationAction(ISD::ROTL, VT, Custom);
1579 setOperationAction(ISD::ROTR, VT, Custom);
1580 }
1581
1582 // Custom legalize 2x32 to get a little better code.
1583 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1584 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1585
1586 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1587 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1588 setOperationAction(ISD::MSCATTER, VT, Custom);
1589
1590 if (Subtarget.hasDQI()) {
1591 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1592 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1593 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1594 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1595 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1596
1597 setOperationAction(ISD::MUL, VT, Legal);
1598 }
1599 }
1600
1601 if (Subtarget.hasCDI()) {
1602 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1603 setOperationAction(ISD::CTLZ, VT, Legal);
1604 }
1605 } // Subtarget.hasCDI()
1606
1607 if (Subtarget.hasVPOPCNTDQ()) {
1608 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1609 setOperationAction(ISD::CTPOP, VT, Legal);
1610 }
1611 }
1612
1613 // This block control legalization of v32i1/v64i1 which are available with
1614 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1615 // useBWIRegs.
1616 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1617 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1618 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1619
1620 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1621 setOperationAction(ISD::ADD, VT, Custom);
1622 setOperationAction(ISD::SUB, VT, Custom);
1623 setOperationAction(ISD::MUL, VT, Custom);
1624 setOperationAction(ISD::VSELECT, VT, Expand);
1625 setOperationAction(ISD::UADDSAT, VT, Custom);
1626 setOperationAction(ISD::SADDSAT, VT, Custom);
1627 setOperationAction(ISD::USUBSAT, VT, Custom);
1628 setOperationAction(ISD::SSUBSAT, VT, Custom);
1629
1630 setOperationAction(ISD::TRUNCATE, VT, Custom);
1631 setOperationAction(ISD::SETCC, VT, Custom);
1632 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1633 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1634 setOperationAction(ISD::SELECT, VT, Custom);
1635 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1636 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1637 }
1638
1639 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1640 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1641 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1642 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1643 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1644 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1645
1646 // Extends from v32i1 masks to 256-bit vectors.
1647 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1648 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1649 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1650 }
1651
1652 // This block controls legalization for v32i16 and v64i8. 512-bits can be
1653 // disabled based on prefer-vector-width and required-vector-width function
1654 // attributes.
1655 if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
1656 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1657 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1658
1659 // Extends from v64i1 masks to 512-bit vectors.
1660 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1661 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1662 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1663
1664 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1665 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1666 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1667 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1668 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1669 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1670 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1671 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1672 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1673 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1674 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1675 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1676 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1677 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1678 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1679 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1680 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1681 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1682 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1683 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1684 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1685 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1686 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1687
1688 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1689 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1690
1691 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1692
1693 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1694 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1695 setOperationAction(ISD::VSELECT, VT, Custom);
1696 setOperationAction(ISD::ABS, VT, Legal);
1697 setOperationAction(ISD::SRL, VT, Custom);
1698 setOperationAction(ISD::SHL, VT, Custom);
1699 setOperationAction(ISD::SRA, VT, Custom);
1700 setOperationAction(ISD::MLOAD, VT, Legal);
1701 setOperationAction(ISD::MSTORE, VT, Legal);
1702 setOperationAction(ISD::CTPOP, VT, Custom);
1703 setOperationAction(ISD::CTLZ, VT, Custom);
1704 setOperationAction(ISD::SMAX, VT, Legal);
1705 setOperationAction(ISD::UMAX, VT, Legal);
1706 setOperationAction(ISD::SMIN, VT, Legal);
1707 setOperationAction(ISD::UMIN, VT, Legal);
1708 setOperationAction(ISD::SETCC, VT, Custom);
1709 setOperationAction(ISD::UADDSAT, VT, Legal);
1710 setOperationAction(ISD::SADDSAT, VT, Legal);
1711 setOperationAction(ISD::USUBSAT, VT, Legal);
1712 setOperationAction(ISD::SSUBSAT, VT, Legal);
1713
1714 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1715 // setcc all the way to isel and prefer SETGT in some isel patterns.
1716 setCondCodeAction(ISD::SETLT, VT, Custom);
1717 setCondCodeAction(ISD::SETLE, VT, Custom);
1718 }
1719
1720 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1721 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1722 }
1723
1724 if (Subtarget.hasBITALG()) {
1725 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1726 setOperationAction(ISD::CTPOP, VT, Legal);
1727 }
1728
1729 if (Subtarget.hasVBMI2()) {
1730 setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
1731 setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
1732 }
1733 }
1734
1735 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1736 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1737 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1738 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1739 }
1740
1741 // These operations are handled on non-VLX by artificially widening in
1742 // isel patterns.
1743 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1744
1745 if (Subtarget.hasBITALG()) {
1746 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1747 setOperationAction(ISD::CTPOP, VT, Legal);
1748 }
1749 }
1750
1751 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1752 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1753 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1754 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1755 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1756 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1757
1758 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1759 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1760 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1761 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1762 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1763
1764 if (Subtarget.hasDQI()) {
1765 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1766 // v2f32 UINT_TO_FP is already custom under SSE2.
1767 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1768 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 1769, __PRETTY_FUNCTION__))
1769 "Unexpected operation action!")((isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && "Unexpected operation action!"
) ? static_cast<void> (0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 1769, __PRETTY_FUNCTION__))
;
1770 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1771 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1772 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1773 }
1774
1775 if (Subtarget.hasBWI()) {
1776 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1777 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1778 }
1779
1780 if (Subtarget.hasVBMI2()) {
1781 // TODO: Make these legal even without VLX?
1782 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1783 MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1784 setOperationAction(ISD::FSHL, VT, Custom);
1785 setOperationAction(ISD::FSHR, VT, Custom);
1786 }
1787 }
1788 }
1789
1790 // We want to custom lower some of our intrinsics.
1791 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1792 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1793 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1794 if (!Subtarget.is64Bit()) {
1795 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1796 }
1797
1798 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1799 // handle type legalization for these operations here.
1800 //
1801 // FIXME: We really should do custom legalization for addition and
1802 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1803 // than generic legalization for 64-bit multiplication-with-overflow, though.
1804 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1805 if (VT == MVT::i64 && !Subtarget.is64Bit())
1806 continue;
1807 // Add/Sub/Mul with overflow operations are custom lowered.
1808 setOperationAction(ISD::SADDO, VT, Custom);
1809 setOperationAction(ISD::UADDO, VT, Custom);
1810 setOperationAction(ISD::SSUBO, VT, Custom);
1811 setOperationAction(ISD::USUBO, VT, Custom);
1812 setOperationAction(ISD::SMULO, VT, Custom);
1813 setOperationAction(ISD::UMULO, VT, Custom);
1814
1815 // Support carry in as value rather than glue.
1816 setOperationAction(ISD::ADDCARRY, VT, Custom);
1817 setOperationAction(ISD::SUBCARRY, VT, Custom);
1818 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1819 }
1820
1821 if (!Subtarget.is64Bit()) {
1822 // These libcalls are not available in 32-bit.
1823 setLibcallName(RTLIB::SHL_I128, nullptr);
1824 setLibcallName(RTLIB::SRL_I128, nullptr);
1825 setLibcallName(RTLIB::SRA_I128, nullptr);
1826 setLibcallName(RTLIB::MUL_I128, nullptr);
1827 }
1828
1829 // Combine sin / cos into _sincos_stret if it is available.
1830 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1831 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1832 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1833 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1834 }
1835
1836 if (Subtarget.isTargetWin64()) {
1837 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1838 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1839 setOperationAction(ISD::SREM, MVT::i128, Custom);
1840 setOperationAction(ISD::UREM, MVT::i128, Custom);
1841 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1842 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1843 }
1844
1845 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1846 // is. We should promote the value to 64-bits to solve this.
1847 // This is what the CRT headers do - `fmodf` is an inline header
1848 // function casting to f64 and calling `fmod`.
1849 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1850 Subtarget.isTargetWindowsItanium()))
1851 for (ISD::NodeType Op :
1852 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1853 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1854 if (isOperationExpand(Op, MVT::f32))
1855 setOperationAction(Op, MVT::f32, Promote);
1856
1857 // We have target-specific dag combine patterns for the following nodes:
1858 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1859 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1860 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1861 setTargetDAGCombine(ISD::CONCAT_VECTORS);
1862 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1863 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1864 setTargetDAGCombine(ISD::BITCAST);
1865 setTargetDAGCombine(ISD::VSELECT);
1866 setTargetDAGCombine(ISD::SELECT);
1867 setTargetDAGCombine(ISD::SHL);
1868 setTargetDAGCombine(ISD::SRA);
1869 setTargetDAGCombine(ISD::SRL);
1870 setTargetDAGCombine(ISD::OR);
1871 setTargetDAGCombine(ISD::AND);
1872 setTargetDAGCombine(ISD::ADD);
1873 setTargetDAGCombine(ISD::FADD);
1874 setTargetDAGCombine(ISD::FSUB);
1875 setTargetDAGCombine(ISD::FNEG);
1876 setTargetDAGCombine(ISD::FMA);
1877 setTargetDAGCombine(ISD::FMINNUM);
1878 setTargetDAGCombine(ISD::FMAXNUM);
1879 setTargetDAGCombine(ISD::SUB);
1880 setTargetDAGCombine(ISD::LOAD);
1881 setTargetDAGCombine(ISD::MLOAD);
1882 setTargetDAGCombine(ISD::STORE);
1883 setTargetDAGCombine(ISD::MSTORE);
1884 setTargetDAGCombine(ISD::TRUNCATE);
1885 setTargetDAGCombine(ISD::ZERO_EXTEND);
1886 setTargetDAGCombine(ISD::ANY_EXTEND);
1887 setTargetDAGCombine(ISD::SIGN_EXTEND);
1888 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1889 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
1890 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1891 setTargetDAGCombine(ISD::SINT_TO_FP);
1892 setTargetDAGCombine(ISD::UINT_TO_FP);
1893 setTargetDAGCombine(ISD::SETCC);
1894 setTargetDAGCombine(ISD::MUL);
1895 setTargetDAGCombine(ISD::XOR);
1896 setTargetDAGCombine(ISD::MSCATTER);
1897 setTargetDAGCombine(ISD::MGATHER);
1898
1899 computeRegisterProperties(Subtarget.getRegisterInfo());
1900
1901 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1902 MaxStoresPerMemsetOptSize = 8;
1903 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1904 MaxStoresPerMemcpyOptSize = 4;
1905 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1906 MaxStoresPerMemmoveOptSize = 4;
1907
1908 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1909 // that needs to benchmarked and balanced with the potential use of vector
1910 // load/store types (PR33329, PR33914).
1911 MaxLoadsPerMemcmp = 2;
1912 MaxLoadsPerMemcmpOptSize = 2;
1913
1914 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1915 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1916
1917 // An out-of-order CPU can speculatively execute past a predictable branch,
1918 // but a conditional move could be stalled by an expensive earlier operation.
1919 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1920 EnableExtLdPromotion = true;
1921 setPrefFunctionAlignment(4); // 2^4 bytes.
1922
1923 verifyIntrinsicTables();
1924}
1925
1926// This has so far only been implemented for 64-bit MachO.
1927bool X86TargetLowering::useLoadStackGuardNode() const {
1928 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1929}
1930
1931bool X86TargetLowering::useStackGuardXorFP() const {
1932 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1933 return Subtarget.getTargetTriple().isOSMSVCRT();
1934}
1935
1936SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1937 const SDLoc &DL) const {
1938 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1939 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1940 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1941 return SDValue(Node, 0);
1942}
1943
1944TargetLoweringBase::LegalizeTypeAction
1945X86TargetLowering::getPreferredVectorAction(MVT VT) const {
1946 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1947 return TypeSplitVector;
1948
1949 if (ExperimentalVectorWideningLegalization &&
1950 VT.getVectorNumElements() != 1 &&
1951 VT.getVectorElementType() != MVT::i1)
1952 return TypeWidenVector;
1953
1954 return TargetLoweringBase::getPreferredVectorAction(VT);
1955}
1956
1957MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1958 CallingConv::ID CC,
1959 EVT VT) const {
1960 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1961 return MVT::v32i8;
1962 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1963}
1964
1965unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1966 CallingConv::ID CC,
1967 EVT VT) const {
1968 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1969 return 1;
1970 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1971}
1972
1973EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1974 LLVMContext& Context,
1975 EVT VT) const {
1976 if (!VT.isVector())
1977 return MVT::i8;
1978
1979 if (Subtarget.hasAVX512()) {
1980 const unsigned NumElts = VT.getVectorNumElements();
1981
1982 // Figure out what this type will be legalized to.
1983 EVT LegalVT = VT;
1984 while (getTypeAction(Context, LegalVT) != TypeLegal)
1985 LegalVT = getTypeToTransformTo(Context, LegalVT);
1986
1987 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1988 if (LegalVT.getSimpleVT().is512BitVector())
1989 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1990
1991 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1992 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1993 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1994 // vXi16/vXi8.
1995 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1996 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1997 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1998 }
1999 }
2000
2001 return VT.changeVectorElementTypeToInteger();
2002}
2003
2004/// Helper for getByValTypeAlignment to determine
2005/// the desired ByVal argument alignment.
2006static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
2007 if (MaxAlign == 16)
2008 return;
2009 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2010 if (VTy->getBitWidth() == 128)
2011 MaxAlign = 16;
2012 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2013 unsigned EltAlign = 0;
2014 getMaxByValAlign(ATy->getElementType(), EltAlign);
2015 if (EltAlign > MaxAlign)
2016 MaxAlign = EltAlign;
2017 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2018 for (auto *EltTy : STy->elements()) {
2019 unsigned EltAlign = 0;
2020 getMaxByValAlign(EltTy, EltAlign);
2021 if (EltAlign > MaxAlign)
2022 MaxAlign = EltAlign;
2023 if (MaxAlign == 16)
2024 break;
2025 }
2026 }
2027}
2028
2029/// Return the desired alignment for ByVal aggregate
2030/// function arguments in the caller parameter area. For X86, aggregates
2031/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2032/// are at 4-byte boundaries.
2033unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2034 const DataLayout &DL) const {
2035 if (Subtarget.is64Bit()) {
2036 // Max of 8 and alignment of type.
2037 unsigned TyAlign = DL.getABITypeAlignment(Ty);
2038 if (TyAlign > 8)
2039 return TyAlign;
2040 return 8;
2041 }
2042
2043 unsigned Align = 4;
2044 if (Subtarget.hasSSE1())
2045 getMaxByValAlign(Ty, Align);
2046 return Align;
2047}
2048
2049/// Returns the target specific optimal type for load
2050/// and store operations as a result of memset, memcpy, and memmove
2051/// lowering. If DstAlign is zero that means it's safe to destination
2052/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
2053/// means there isn't a need to check it against alignment requirement,
2054/// probably because the source does not need to be loaded. If 'IsMemset' is
2055/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
2056/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
2057/// source is constant so it does not need to be loaded.
2058/// It returns EVT::Other if the type should be determined using generic
2059/// target-independent logic.
2060/// For vector ops we check that the overall size isn't larger than our
2061/// preferred vector width.
2062EVT X86TargetLowering::getOptimalMemOpType(
2063 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
2064 bool ZeroMemset, bool MemcpyStrSrc,
2065 const AttributeList &FuncAttributes) const {
2066 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2067 if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
2068 ((DstAlign == 0 || DstAlign >= 16) &&
2069 (SrcAlign == 0 || SrcAlign >= 16)))) {
2070 // FIXME: Check if unaligned 32-byte accesses are slow.
2071 if (Size >= 32 && Subtarget.hasAVX() &&
2072 (Subtarget.getPreferVectorWidth() >= 256)) {
2073 // Although this isn't a well-supported type for AVX1, we'll let
2074 // legalization and shuffle lowering produce the optimal codegen. If we
2075 // choose an optimal type with a vector element larger than a byte,
2076 // getMemsetStores() may create an intermediate splat (using an integer
2077 // multiply) before we splat as a vector.
2078 return MVT::v32i8;
2079 }
2080 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2081 return MVT::v16i8;
2082 // TODO: Can SSE1 handle a byte vector?
2083 // If we have SSE1 registers we should be able to use them.
2084 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2085 (Subtarget.getPreferVectorWidth() >= 128))
2086 return MVT::v4f32;
2087 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
2088 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2089 // Do not use f64 to lower memcpy if source is string constant. It's
2090 // better to use i32 to avoid the loads.
2091 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2092 // The gymnastics of splatting a byte value into an XMM register and then
2093 // only using 8-byte stores (because this is a CPU with slow unaligned
2094 // 16-byte accesses) makes that a loser.
2095 return MVT::f64;
2096 }
2097 }
2098 // This is a compromise. If we reach here, unaligned accesses may be slow on
2099 // this target. However, creating smaller, aligned accesses could be even
2100 // slower and would certainly be a lot more code.
2101 if (Subtarget.is64Bit() && Size >= 8)
2102 return MVT::i64;
2103 return MVT::i32;
2104}
2105
2106bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2107 if (VT == MVT::f32)
2108 return X86ScalarSSEf32;
2109 else if (VT == MVT::f64)
2110 return X86ScalarSSEf64;
2111 return true;
2112}
2113
2114bool
2115X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2116 unsigned,
2117 unsigned,
2118 bool *Fast) const {
2119 if (Fast) {
2120 switch (VT.getSizeInBits()) {
2121 default:
2122 // 8-byte and under are always assumed to be fast.
2123 *Fast = true;
2124 break;
2125 case 128:
2126 *Fast = !Subtarget.isUnalignedMem16Slow();
2127 break;
2128 case 256:
2129 *Fast = !Subtarget.isUnalignedMem32Slow();
2130 break;
2131 // TODO: What about AVX-512 (512-bit) accesses?
2132 }
2133 }
2134 // Misaligned accesses of any size are always allowed.
2135 return true;
2136}
2137
2138/// Return the entry encoding for a jump table in the
2139/// current function. The returned value is a member of the
2140/// MachineJumpTableInfo::JTEntryKind enum.
2141unsigned X86TargetLowering::getJumpTableEncoding() const {
2142 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2143 // symbol.
2144 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2145 return MachineJumpTableInfo::EK_Custom32;
2146
2147 // Otherwise, use the normal jump table encoding heuristics.
2148 return TargetLowering::getJumpTableEncoding();
2149}
2150
2151bool X86TargetLowering::useSoftFloat() const {
2152 return Subtarget.useSoftFloat();
2153}
2154
2155void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2156 ArgListTy &Args) const {
2157
2158 // Only relabel X86-32 for C / Stdcall CCs.
2159 if (Subtarget.is64Bit())
2160 return;
2161 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2162 return;
2163 unsigned ParamRegs = 0;
2164 if (auto *M = MF->getFunction().getParent())
2165 ParamRegs = M->getNumberRegisterParameters();
2166
2167 // Mark the first N int arguments as having reg
2168 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
2169 Type *T = Args[Idx].Ty;
2170 if (T->isIntOrPtrTy())
2171 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2172 unsigned numRegs = 1;
2173 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2174 numRegs = 2;
2175 if (ParamRegs < numRegs)
2176 return;
2177 ParamRegs -= numRegs;
2178 Args[Idx].IsInReg = true;
2179 }
2180 }
2181}
2182
2183const MCExpr *
2184X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2185 const MachineBasicBlock *MBB,
2186 unsigned uid,MCContext &Ctx) const{
2187 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2187, __PRETTY_FUNCTION__))
;
2188 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2189 // entries.
2190 return MCSymbolRefExpr::create(MBB->getSymbol(),
2191 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2192}
2193
2194/// Returns relocation base for the given PIC jumptable.
2195SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2196 SelectionDAG &DAG) const {
2197 if (!Subtarget.is64Bit())
2198 // This doesn't have SDLoc associated with it, but is not really the
2199 // same as a Register.
2200 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2201 getPointerTy(DAG.getDataLayout()));
2202 return Table;
2203}
2204
2205/// This returns the relocation base for the given PIC jumptable,
2206/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2207const MCExpr *X86TargetLowering::
2208getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2209 MCContext &Ctx) const {
2210 // X86-64 uses RIP relative addressing based on the jump table label.
2211 if (Subtarget.isPICStyleRIPRel())
2212 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2213
2214 // Otherwise, the reference is relative to the PIC base.
2215 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2216}
2217
2218std::pair<const TargetRegisterClass *, uint8_t>
2219X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2220 MVT VT) const {
2221 const TargetRegisterClass *RRC = nullptr;
2222 uint8_t Cost = 1;
2223 switch (VT.SimpleTy) {
2224 default:
2225 return TargetLowering::findRepresentativeClass(TRI, VT);
2226 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2227 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2228 break;
2229 case MVT::x86mmx:
2230 RRC = &X86::VR64RegClass;
2231 break;
2232 case MVT::f32: case MVT::f64:
2233 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2234 case MVT::v4f32: case MVT::v2f64:
2235 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2236 case MVT::v8f32: case MVT::v4f64:
2237 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2238 case MVT::v16f32: case MVT::v8f64:
2239 RRC = &X86::VR128XRegClass;
2240 break;
2241 }
2242 return std::make_pair(RRC, Cost);
2243}
2244
2245unsigned X86TargetLowering::getAddressSpace() const {
2246 if (Subtarget.is64Bit())
2247 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2248 return 256;
2249}
2250
2251static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2252 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2253 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2254}
2255
2256static Constant* SegmentOffset(IRBuilder<> &IRB,
2257 unsigned Offset, unsigned AddressSpace) {
2258 return ConstantExpr::getIntToPtr(
2259 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2260 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2261}
2262
2263Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2264 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2265 // tcbhead_t; use it instead of the usual global variable (see
2266 // sysdeps/{i386,x86_64}/nptl/tls.h)
2267 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2268 if (Subtarget.isTargetFuchsia()) {
2269 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2270 return SegmentOffset(IRB, 0x10, getAddressSpace());
2271 } else {
2272 // %fs:0x28, unless we're using a Kernel code model, in which case
2273 // it's %gs:0x28. gs:0x14 on i386.
2274 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2275 return SegmentOffset(IRB, Offset, getAddressSpace());
2276 }
2277 }
2278
2279 return TargetLowering::getIRStackGuard(IRB);
2280}
2281
2282void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2283 // MSVC CRT provides functionalities for stack protection.
2284 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2285 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2286 // MSVC CRT has a global variable holding security cookie.
2287 M.getOrInsertGlobal("__security_cookie",
2288 Type::getInt8PtrTy(M.getContext()));
2289
2290 // MSVC CRT has a function to validate security cookie.
2291 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2292 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2293 Type::getInt8PtrTy(M.getContext()));
2294 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2295 F->setCallingConv(CallingConv::X86_FastCall);
2296 F->addAttribute(1, Attribute::AttrKind::InReg);
2297 }
2298 return;
2299 }
2300 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2301 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2302 return;
2303 TargetLowering::insertSSPDeclarations(M);
2304}
2305
2306Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2307 // MSVC CRT has a global variable holding security cookie.
2308 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2309 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2310 return M.getGlobalVariable("__security_cookie");
2311 }
2312 return TargetLowering::getSDagStackGuard(M);
2313}
2314
2315Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2316 // MSVC CRT has a function to validate security cookie.
2317 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2318 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2319 return M.getFunction("__security_check_cookie");
2320 }
2321 return TargetLowering::getSSPStackGuardCheck(M);
2322}
2323
2324Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2325 if (Subtarget.getTargetTriple().isOSContiki())
2326 return getDefaultSafeStackPointerLocation(IRB, false);
2327
2328 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2329 // definition of TLS_SLOT_SAFESTACK in
2330 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2331 if (Subtarget.isTargetAndroid()) {
2332 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2333 // %gs:0x24 on i386
2334 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2335 return SegmentOffset(IRB, Offset, getAddressSpace());
2336 }
2337
2338 // Fuchsia is similar.
2339 if (Subtarget.isTargetFuchsia()) {
2340 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2341 return SegmentOffset(IRB, 0x18, getAddressSpace());
2342 }
2343
2344 return TargetLowering::getSafeStackPointerLocation(IRB);
2345}
2346
2347bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2348 unsigned DestAS) const {
2349 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2349, __PRETTY_FUNCTION__))
;
2350
2351 return SrcAS < 256 && DestAS < 256;
2352}
2353
2354//===----------------------------------------------------------------------===//
2355// Return Value Calling Convention Implementation
2356//===----------------------------------------------------------------------===//
2357
2358bool X86TargetLowering::CanLowerReturn(
2359 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2360 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2361 SmallVector<CCValAssign, 16> RVLocs;
2362 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2363 return CCInfo.CheckReturn(Outs, RetCC_X86);
2364}
2365
2366const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2367 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2368 return ScratchRegs;
2369}
2370
2371/// Lowers masks values (v*i1) to the local register values
2372/// \returns DAG node after lowering to register type
2373static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2374 const SDLoc &Dl, SelectionDAG &DAG) {
2375 EVT ValVT = ValArg.getValueType();
2376
2377 if (ValVT == MVT::v1i1)
2378 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2379 DAG.getIntPtrConstant(0, Dl));
2380
2381 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2382 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2383 // Two stage lowering might be required
2384 // bitcast: v8i1 -> i8 / v16i1 -> i16
2385 // anyextend: i8 -> i32 / i16 -> i32
2386 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2387 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2388 if (ValLoc == MVT::i32)
2389 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2390 return ValToCopy;
2391 }
2392
2393 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2394 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2395 // One stage lowering is required
2396 // bitcast: v32i1 -> i32 / v64i1 -> i64
2397 return DAG.getBitcast(ValLoc, ValArg);
2398 }
2399
2400 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2401}
2402
2403/// Breaks v64i1 value into two registers and adds the new node to the DAG
2404static void Passv64i1ArgInRegs(
2405 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2406 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2407 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2408 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((Subtarget.hasBWI() && "Expected AVX512BW target!") ?
static_cast<void> (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2408, __PRETTY_FUNCTION__))
;
2409 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2409, __PRETTY_FUNCTION__))
;
2410 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2410, __PRETTY_FUNCTION__))
;
2411 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2412, __PRETTY_FUNCTION__))
2412 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2412, __PRETTY_FUNCTION__))
;
2413
2414 // Before splitting the value we cast it to i64
2415 Arg = DAG.getBitcast(MVT::i64, Arg);
2416
2417 // Splitting the value into two i32 types
2418 SDValue Lo, Hi;
2419 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2420 DAG.getConstant(0, Dl, MVT::i32));
2421 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2422 DAG.getConstant(1, Dl, MVT::i32));
2423
2424 // Attach the two i32 types into corresponding registers
2425 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2426 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2427}
2428
2429SDValue
2430X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2431 bool isVarArg,
2432 const SmallVectorImpl<ISD::OutputArg> &Outs,
2433 const SmallVectorImpl<SDValue> &OutVals,
2434 const SDLoc &dl, SelectionDAG &DAG) const {
2435 MachineFunction &MF = DAG.getMachineFunction();
2436 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2437
2438 // In some cases we need to disable registers from the default CSR list.
2439 // For example, when they are used for argument passing.
2440 bool ShouldDisableCalleeSavedRegister =
2441 CallConv == CallingConv::X86_RegCall ||
2442 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2443
2444 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2445 report_fatal_error("X86 interrupts may not return any value");
2446
2447 SmallVector<CCValAssign, 16> RVLocs;
2448 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2449 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2450
2451 SDValue Flag;
2452 SmallVector<SDValue, 6> RetOps;
2453 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2454 // Operand #1 = Bytes To Pop
2455 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2456 MVT::i32));
2457
2458 // Copy the result values into the output registers.
2459 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2460 ++I, ++OutsIndex) {
2461 CCValAssign &VA = RVLocs[I];
2462 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2462, __PRETTY_FUNCTION__))
;
2463
2464 // Add the register to the CalleeSaveDisableRegs list.
2465 if (ShouldDisableCalleeSavedRegister)
2466 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2467
2468 SDValue ValToCopy = OutVals[OutsIndex];
2469 EVT ValVT = ValToCopy.getValueType();
2470
2471 // Promote values to the appropriate types.
2472 if (VA.getLocInfo() == CCValAssign::SExt)
2473 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2474 else if (VA.getLocInfo() == CCValAssign::ZExt)
2475 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2476 else if (VA.getLocInfo() == CCValAssign::AExt) {
2477 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2478 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2479 else
2480 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2481 }
2482 else if (VA.getLocInfo() == CCValAssign::BCvt)
2483 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2484
2485 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2486, __PRETTY_FUNCTION__))
2486 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2486, __PRETTY_FUNCTION__))
;
2487
2488 // If this is x86-64, and we disabled SSE, we can't return FP values,
2489 // or SSE or MMX vectors.
2490 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2491 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2492 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2493 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2494 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2495 } else if (ValVT == MVT::f64 &&
2496 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2497 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2498 // llvm-gcc has never done it right and no one has noticed, so this
2499 // should be OK for now.
2500 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2501 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2502 }
2503
2504 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2505 // the RET instruction and handled by the FP Stackifier.
2506 if (VA.getLocReg() == X86::FP0 ||
2507 VA.getLocReg() == X86::FP1) {
2508 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2509 // change the value to the FP stack register class.
2510 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2511 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2512 RetOps.push_back(ValToCopy);
2513 // Don't emit a copytoreg.
2514 continue;
2515 }
2516
2517 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2518 // which is returned in RAX / RDX.
2519 if (Subtarget.is64Bit()) {
2520 if (ValVT == MVT::x86mmx) {
2521 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2522 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2523 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2524 ValToCopy);
2525 // If we don't have SSE2 available, convert to v4f32 so the generated
2526 // register is legal.
2527 if (!Subtarget.hasSSE2())
2528 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2529 }
2530 }
2531 }
2532
2533 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2534
2535 if (VA.needsCustom()) {
2536 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2537, __PRETTY_FUNCTION__))
2537 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2537, __PRETTY_FUNCTION__))
;
2538
2539 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2540 Subtarget);
2541
2542 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2543, __PRETTY_FUNCTION__))
2543 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2543, __PRETTY_FUNCTION__))
;
2544
2545 // Add the second register to the CalleeSaveDisableRegs list.
2546 if (ShouldDisableCalleeSavedRegister)
2547 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2548 } else {
2549 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2550 }
2551
2552 // Add nodes to the DAG and add the values into the RetOps list
2553 for (auto &Reg : RegsToPass) {
2554 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2555 Flag = Chain.getValue(1);
2556 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2557 }
2558 }
2559
2560 // Swift calling convention does not require we copy the sret argument
2561 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2562
2563 // All x86 ABIs require that for returning structs by value we copy
2564 // the sret argument into %rax/%eax (depending on ABI) for the return.
2565 // We saved the argument into a virtual register in the entry block,
2566 // so now we copy the value out and into %rax/%eax.
2567 //
2568 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2569 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2570 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2571 // either case FuncInfo->setSRetReturnReg() will have been called.
2572 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2573 // When we have both sret and another return value, we should use the
2574 // original Chain stored in RetOps[0], instead of the current Chain updated
2575 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2576
2577 // For the case of sret and another return value, we have
2578 // Chain_0 at the function entry
2579 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2580 // If we use Chain_1 in getCopyFromReg, we will have
2581 // Val = getCopyFromReg(Chain_1)
2582 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2583
2584 // getCopyToReg(Chain_0) will be glued together with
2585 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2586 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2587 // Data dependency from Unit B to Unit A due to usage of Val in
2588 // getCopyToReg(Chain_1, Val)
2589 // Chain dependency from Unit A to Unit B
2590
2591 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2592 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2593 getPointerTy(MF.getDataLayout()));
2594
2595 unsigned RetValReg
2596 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2597 X86::RAX : X86::EAX;
2598 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2599 Flag = Chain.getValue(1);
2600
2601 // RAX/EAX now acts like a return value.
2602 RetOps.push_back(
2603 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2604
2605 // Add the returned register to the CalleeSaveDisableRegs list.
2606 if (ShouldDisableCalleeSavedRegister)
2607 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2608 }
2609
2610 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2611 const MCPhysReg *I =
2612 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2613 if (I) {
2614 for (; *I; ++I) {
2615 if (X86::GR64RegClass.contains(*I))
2616 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2617 else
2618 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2618)
;
2619 }
2620 }
2621
2622 RetOps[0] = Chain; // Update chain.
2623
2624 // Add the flag if we have it.
2625 if (Flag.getNode())
2626 RetOps.push_back(Flag);
2627
2628 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2629 if (CallConv == CallingConv::X86_INTR)
2630 opcode = X86ISD::IRET;
2631 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2632}
2633
2634bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2635 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2636 return false;
2637
2638 SDValue TCChain = Chain;
2639 SDNode *Copy = *N->use_begin();
2640 if (Copy->getOpcode() == ISD::CopyToReg) {
2641 // If the copy has a glue operand, we conservatively assume it isn't safe to
2642 // perform a tail call.
2643 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2644 return false;
2645 TCChain = Copy->getOperand(0);
2646 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2647 return false;
2648
2649 bool HasRet = false;
2650 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2651 UI != UE; ++UI) {
2652 if (UI->getOpcode() != X86ISD::RET_FLAG)
2653 return false;
2654 // If we are returning more than one value, we can definitely
2655 // not make a tail call see PR19530
2656 if (UI->getNumOperands() > 4)
2657 return false;
2658 if (UI->getNumOperands() == 4 &&
2659 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2660 return false;
2661 HasRet = true;
2662 }
2663
2664 if (!HasRet)
2665 return false;
2666
2667 Chain = TCChain;
2668 return true;
2669}
2670
2671EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2672 ISD::NodeType ExtendKind) const {
2673 MVT ReturnMVT = MVT::i32;
2674
2675 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2676 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2677 // The ABI does not require i1, i8 or i16 to be extended.
2678 //
2679 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2680 // always extending i8/i16 return values, so keep doing that for now.
2681 // (PR26665).
2682 ReturnMVT = MVT::i8;
2683 }
2684
2685 EVT MinVT = getRegisterType(Context, ReturnMVT);
2686 return VT.bitsLT(MinVT) ? MinVT : VT;
2687}
2688
2689/// Reads two 32 bit registers and creates a 64 bit mask value.
2690/// \param VA The current 32 bit value that need to be assigned.
2691/// \param NextVA The next 32 bit value that need to be assigned.
2692/// \param Root The parent DAG node.
2693/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2694/// glue purposes. In the case the DAG is already using
2695/// physical register instead of virtual, we should glue
2696/// our new SDValue to InFlag SDvalue.
2697/// \return a new SDvalue of size 64bit.
2698static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2699 SDValue &Root, SelectionDAG &DAG,
2700 const SDLoc &Dl, const X86Subtarget &Subtarget,
2701 SDValue *InFlag = nullptr) {
2702 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2702, __PRETTY_FUNCTION__))
;
2703 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
;
2704 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2705, __PRETTY_FUNCTION__))
2705 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2705, __PRETTY_FUNCTION__))
;
2706 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2707, __PRETTY_FUNCTION__))
2707 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2707, __PRETTY_FUNCTION__))
;
2708 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2709, __PRETTY_FUNCTION__))
2709 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2709, __PRETTY_FUNCTION__))
;
2710
2711 SDValue Lo, Hi;
2712 SDValue ArgValueLo, ArgValueHi;
2713
2714 MachineFunction &MF = DAG.getMachineFunction();
2715 const TargetRegisterClass *RC = &X86::GR32RegClass;
2716
2717 // Read a 32 bit value from the registers.
2718 if (nullptr == InFlag) {
2719 // When no physical register is present,
2720 // create an intermediate virtual register.
2721 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2722 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2723 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2724 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2725 } else {
2726 // When a physical register is available read the value from it and glue
2727 // the reads together.
2728 ArgValueLo =
2729 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2730 *InFlag = ArgValueLo.getValue(2);
2731 ArgValueHi =
2732 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2733 *InFlag = ArgValueHi.getValue(2);
2734 }
2735
2736 // Convert the i32 type into v32i1 type.
2737 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2738
2739 // Convert the i32 type into v32i1 type.
2740 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2741
2742 // Concatenate the two values together.
2743 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2744}
2745
2746/// The function will lower a register of various sizes (8/16/32/64)
2747/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2748/// \returns a DAG node contains the operand after lowering to mask type.
2749static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2750 const EVT &ValLoc, const SDLoc &Dl,
2751 SelectionDAG &DAG) {
2752 SDValue ValReturned = ValArg;
2753
2754 if (ValVT == MVT::v1i1)
2755 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2756
2757 if (ValVT == MVT::v64i1) {
2758 // In 32 bit machine, this case is handled by getv64i1Argument
2759 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2759, __PRETTY_FUNCTION__))
;
2760 // In 64 bit machine, There is no need to truncate the value only bitcast
2761 } else {
2762 MVT maskLen;
2763 switch (ValVT.getSimpleVT().SimpleTy) {
2764 case MVT::v8i1:
2765 maskLen = MVT::i8;
2766 break;
2767 case MVT::v16i1:
2768 maskLen = MVT::i16;
2769 break;
2770 case MVT::v32i1:
2771 maskLen = MVT::i32;
2772 break;
2773 default:
2774 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2774)
;
2775 }
2776
2777 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2778 }
2779 return DAG.getBitcast(ValVT, ValReturned);
2780}
2781
2782/// Lower the result values of a call into the
2783/// appropriate copies out of appropriate physical registers.
2784///
2785SDValue X86TargetLowering::LowerCallResult(
2786 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2787 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2788 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2789 uint32_t *RegMask) const {
2790
2791 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2792 // Assign locations to each value returned by this call.
2793 SmallVector<CCValAssign, 16> RVLocs;
2794 bool Is64Bit = Subtarget.is64Bit();
2795 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2796 *DAG.getContext());
2797 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2798
2799 // Copy all of the result registers out of their specified physreg.
2800 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2801 ++I, ++InsIndex) {
2802 CCValAssign &VA = RVLocs[I];
2803 EVT CopyVT = VA.getLocVT();
2804
2805 // In some calling conventions we need to remove the used registers
2806 // from the register mask.
2807 if (RegMask) {
2808 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2809 SubRegs.isValid(); ++SubRegs)
2810 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2811 }
2812
2813 // If this is x86-64, and we disabled SSE, we can't return FP values
2814 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2815 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2816 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2817 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2818 }
2819
2820 // If we prefer to use the value in xmm registers, copy it out as f80 and
2821 // use a truncate to move it from fp stack reg to xmm reg.
2822 bool RoundAfterCopy = false;
2823 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2824 isScalarFPTypeInSSEReg(VA.getValVT())) {
2825 if (!Subtarget.hasX87())
2826 report_fatal_error("X87 register return with X87 disabled");
2827 CopyVT = MVT::f80;
2828 RoundAfterCopy = (CopyVT != VA.getLocVT());
2829 }
2830
2831 SDValue Val;
2832 if (VA.needsCustom()) {
2833 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2834, __PRETTY_FUNCTION__))
2834 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 2834, __PRETTY_FUNCTION__))
;
2835 Val =
2836 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2837 } else {
2838 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2839 .getValue(1);
2840 Val = Chain.getValue(0);
2841 InFlag = Chain.getValue(2);
2842 }
2843
2844 if (RoundAfterCopy)
2845 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2846 // This truncation won't change the value.
2847 DAG.getIntPtrConstant(1, dl));
2848
2849 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2850 if (VA.getValVT().isVector() &&
2851 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2852 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2853 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2854 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2855 } else
2856 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2857 }
2858
2859 InVals.push_back(Val);
2860 }
2861
2862 return Chain;
2863}
2864
2865//===----------------------------------------------------------------------===//
2866// C & StdCall & Fast Calling Convention implementation
2867//===----------------------------------------------------------------------===//
2868// StdCall calling convention seems to be standard for many Windows' API
2869// routines and around. It differs from C calling convention just a little:
2870// callee should clean up the stack, not caller. Symbols should be also
2871// decorated in some fancy way :) It doesn't support any vector arguments.
2872// For info on fast calling convention see Fast Calling Convention (tail call)
2873// implementation LowerX86_32FastCCCallTo.
2874
2875/// CallIsStructReturn - Determines whether a call uses struct return
2876/// semantics.
2877enum StructReturnType {
2878 NotStructReturn,
2879 RegStructReturn,
2880 StackStructReturn
2881};
2882static StructReturnType
2883callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
2884 if (Outs.empty())
2885 return NotStructReturn;
2886
2887 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2888 if (!Flags.isSRet())
2889 return NotStructReturn;
2890 if (Flags.isInReg() || IsMCU)
2891 return RegStructReturn;
2892 return StackStructReturn;
2893}
2894
2895/// Determines whether a function uses struct return semantics.
2896static StructReturnType
2897argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
2898 if (Ins.empty())
2899 return NotStructReturn;
2900
2901 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2902 if (!Flags.isSRet())
2903 return NotStructReturn;
2904 if (Flags.isInReg() || IsMCU)
2905 return RegStructReturn;
2906 return StackStructReturn;
2907}
2908
2909/// Make a copy of an aggregate at address specified by "Src" to address
2910/// "Dst" with size and alignment information specified by the specific
2911/// parameter attribute. The copy will be passed as a byval function parameter.
2912static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2913 SDValue Chain, ISD::ArgFlagsTy Flags,
2914 SelectionDAG &DAG, const SDLoc &dl) {
2915 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2916
2917 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2918 /*isVolatile*/false, /*AlwaysInline=*/true,
2919 /*isTailCall*/false,
2920 MachinePointerInfo(), MachinePointerInfo());
2921}
2922
2923/// Return true if the calling convention is one that we can guarantee TCO for.
2924static bool canGuaranteeTCO(CallingConv::ID CC) {
2925 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2926 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2927 CC == CallingConv::HHVM);
2928}
2929
2930/// Return true if we might ever do TCO for calls with this calling convention.
2931static bool mayTailCallThisCC(CallingConv::ID CC) {
2932 switch (CC) {
2933 // C calling conventions:
2934 case CallingConv::C:
2935 case CallingConv::Win64:
2936 case CallingConv::X86_64_SysV:
2937 // Callee pop conventions:
2938 case CallingConv::X86_ThisCall:
2939 case CallingConv::X86_StdCall:
2940 case CallingConv::X86_VectorCall:
2941 case CallingConv::X86_FastCall:
2942 // Swift:
2943 case CallingConv::Swift:
2944 return true;
2945 default:
2946 return canGuaranteeTCO(CC);
2947 }
2948}
2949
2950/// Return true if the function is being made into a tailcall target by
2951/// changing its ABI.
2952static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2953 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2954}
2955
2956bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2957 auto Attr =
2958 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2959 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2960 return false;
2961
2962 ImmutableCallSite CS(CI);
2963 CallingConv::ID CalleeCC = CS.getCallingConv();
2964 if (!mayTailCallThisCC(CalleeCC))
2965 return false;
2966
2967 return true;
2968}
2969
2970SDValue
2971X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2972 const SmallVectorImpl<ISD::InputArg> &Ins,
2973 const SDLoc &dl, SelectionDAG &DAG,
2974 const CCValAssign &VA,
2975 MachineFrameInfo &MFI, unsigned i) const {
2976 // Create the nodes corresponding to a load from this parameter slot.
2977 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2978 bool AlwaysUseMutable = shouldGuaranteeTCO(
2979 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2980 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2981 EVT ValVT;
2982 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2983
2984 // If value is passed by pointer we have address passed instead of the value
2985 // itself. No need to extend if the mask value and location share the same
2986 // absolute size.
2987 bool ExtendedInMem =
2988 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2989 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2990
2991 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2992 ValVT = VA.getLocVT();
2993 else
2994 ValVT = VA.getValVT();
2995
2996 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2997 // changed with more analysis.
2998 // In case of tail call optimization mark all arguments mutable. Since they
2999 // could be overwritten by lowering of arguments in case of a tail call.
3000 if (Flags.isByVal()) {
3001 unsigned Bytes = Flags.getByValSize();
3002 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3003
3004 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3005 // can be improved with deeper analysis.
3006 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3007 /*isAliased=*/true);
3008 return DAG.getFrameIndex(FI, PtrVT);
3009 }
3010
3011 // This is an argument in memory. We might be able to perform copy elision.
3012 // If the argument is passed directly in memory without any extension, then we
3013 // can perform copy elision. Large vector types, for example, may be passed
3014 // indirectly by pointer.
3015 if (Flags.isCopyElisionCandidate() &&
3016 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
3017 EVT ArgVT = Ins[i].ArgVT;
3018 SDValue PartAddr;
3019 if (Ins[i].PartOffset == 0) {
3020 // If this is a one-part value or the first part of a multi-part value,
3021 // create a stack object for the entire argument value type and return a
3022 // load from our portion of it. This assumes that if the first part of an
3023 // argument is in memory, the rest will also be in memory.
3024 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3025 /*Immutable=*/false);
3026 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3027 return DAG.getLoad(
3028 ValVT, dl, Chain, PartAddr,
3029 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3030 } else {
3031 // This is not the first piece of an argument in memory. See if there is
3032 // already a fixed stack object including this offset. If so, assume it
3033 // was created by the PartOffset == 0 branch above and create a load from
3034 // the appropriate offset into it.
3035 int64_t PartBegin = VA.getLocMemOffset();
3036 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3037 int FI = MFI.getObjectIndexBegin();
3038 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3039 int64_t ObjBegin = MFI.getObjectOffset(FI);
3040 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3041 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3042 break;
3043 }
3044 if (MFI.isFixedObjectIndex(FI)) {
3045 SDValue Addr =
3046 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3047 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3048 return DAG.getLoad(
3049 ValVT, dl, Chain, Addr,
3050 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3051 Ins[i].PartOffset));
3052 }
3053 }
3054 }
3055
3056 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3057 VA.getLocMemOffset(), isImmutable);
3058
3059 // Set SExt or ZExt flag.
3060 if (VA.getLocInfo() == CCValAssign::ZExt) {
3061 MFI.setObjectZExt(FI, true);
3062 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3063 MFI.setObjectSExt(FI, true);
3064 }
3065
3066 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3067 SDValue Val = DAG.getLoad(
3068 ValVT, dl, Chain, FIN,
3069 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3070 return ExtendedInMem
3071 ? (VA.getValVT().isVector()
3072 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3073 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3074 : Val;
3075}
3076
3077// FIXME: Get this from tablegen.
3078static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3079 const X86Subtarget &Subtarget) {
3080 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3080, __PRETTY_FUNCTION__))
;
3081
3082 if (Subtarget.isCallingConvWin64(CallConv)) {
3083 static const MCPhysReg GPR64ArgRegsWin64[] = {
3084 X86::RCX, X86::RDX, X86::R8, X86::R9
3085 };
3086 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3087 }
3088
3089 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3090 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3091 };
3092 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3093}
3094
3095// FIXME: Get this from tablegen.
3096static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3097 CallingConv::ID CallConv,
3098 const X86Subtarget &Subtarget) {
3099 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3099, __PRETTY_FUNCTION__))
;
3100 if (Subtarget.isCallingConvWin64(CallConv)) {
3101 // The XMM registers which might contain var arg parameters are shadowed
3102 // in their paired GPR. So we only need to save the GPR to their home
3103 // slots.
3104 // TODO: __vectorcall will change this.
3105 return None;
3106 }
3107
3108 const Function &F = MF.getFunction();
3109 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
3110 bool isSoftFloat = Subtarget.useSoftFloat();
3111 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3112, __PRETTY_FUNCTION__))
3112 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3112, __PRETTY_FUNCTION__))
;
3113 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
3114 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3115 // registers.
3116 return None;
3117
3118 static const MCPhysReg XMMArgRegs64Bit[] = {
3119 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3120 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3121 };
3122 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3123}
3124
3125#ifndef NDEBUG
3126static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3127 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
3128 [](const CCValAssign &A, const CCValAssign &B) -> bool {
3129 return A.getValNo() < B.getValNo();
3130 });
3131}
3132#endif
3133
3134SDValue X86TargetLowering::LowerFormalArguments(
3135 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3136 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3137 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3138 MachineFunction &MF = DAG.getMachineFunction();
3139 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3140 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3141
3142 const Function &F = MF.getFunction();
3143 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3144 F.getName() == "main")
3145 FuncInfo->setForceFramePointer(true);
3146
3147 MachineFrameInfo &MFI = MF.getFrameInfo();
3148 bool Is64Bit = Subtarget.is64Bit();
3149 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3150
3151 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3153, __PRETTY_FUNCTION__))
3152 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3153, __PRETTY_FUNCTION__))
3153 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3153, __PRETTY_FUNCTION__))
;
3154
3155 // Assign locations to all of the incoming arguments.
3156 SmallVector<CCValAssign, 16> ArgLocs;
3157 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3158
3159 // Allocate shadow area for Win64.
3160 if (IsWin64)
3161 CCInfo.AllocateStack(32, 8);
3162
3163 CCInfo.AnalyzeArguments(Ins, CC_X86);
3164
3165 // In vectorcall calling convention a second pass is required for the HVA
3166 // types.
3167 if (CallingConv::X86_VectorCall == CallConv) {
3168 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3169 }
3170
3171 // The next loop assumes that the locations are in the same order of the
3172 // input arguments.
3173 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3174, __PRETTY_FUNCTION__))
3174 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3174, __PRETTY_FUNCTION__))
;
3175
3176 SDValue ArgValue;
3177 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3178 ++I, ++InsIndex) {
3179 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3179, __PRETTY_FUNCTION__))
;
3180 CCValAssign &VA = ArgLocs[I];
3181
3182 if (VA.isRegLoc()) {
3183 EVT RegVT = VA.getLocVT();
3184 if (VA.needsCustom()) {
3185 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __PRETTY_FUNCTION__))
3186 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __PRETTY_FUNCTION__))
3187 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3187, __PRETTY_FUNCTION__))
;
3188
3189 // v64i1 values, in regcall calling convention, that are
3190 // compiled to 32 bit arch, are split up into two registers.
3191 ArgValue =
3192 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3193 } else {
3194 const TargetRegisterClass *RC;
3195 if (RegVT == MVT::i8)
3196 RC = &X86::GR8RegClass;
3197 else if (RegVT == MVT::i16)
3198 RC = &X86::GR16RegClass;
3199 else if (RegVT == MVT::i32)
3200 RC = &X86::GR32RegClass;
3201 else if (Is64Bit && RegVT == MVT::i64)
3202 RC = &X86::GR64RegClass;
3203 else if (RegVT == MVT::f32)
3204 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3205 else if (RegVT == MVT::f64)
3206 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3207 else if (RegVT == MVT::f80)
3208 RC = &X86::RFP80RegClass;
3209 else if (RegVT == MVT::f128)
3210 RC = &X86::VR128RegClass;
3211 else if (RegVT.is512BitVector())
3212 RC = &X86::VR512RegClass;
3213 else if (RegVT.is256BitVector())
3214 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3215 else if (RegVT.is128BitVector())
3216 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3217 else if (RegVT == MVT::x86mmx)
3218 RC = &X86::VR64RegClass;
3219 else if (RegVT == MVT::v1i1)
3220 RC = &X86::VK1RegClass;
3221 else if (RegVT == MVT::v8i1)
3222 RC = &X86::VK8RegClass;
3223 else if (RegVT == MVT::v16i1)
3224 RC = &X86::VK16RegClass;
3225 else if (RegVT == MVT::v32i1)
3226 RC = &X86::VK32RegClass;
3227 else if (RegVT == MVT::v64i1)
3228 RC = &X86::VK64RegClass;
3229 else
3230 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3230)
;
3231
3232 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3233 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3234 }
3235
3236 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3237 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3238 // right size.
3239 if (VA.getLocInfo() == CCValAssign::SExt)
3240 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3241 DAG.getValueType(VA.getValVT()));
3242 else if (VA.getLocInfo() == CCValAssign::ZExt)
3243 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3244 DAG.getValueType(VA.getValVT()));
3245 else if (VA.getLocInfo() == CCValAssign::BCvt)
3246 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3247
3248 if (VA.isExtInLoc()) {
3249 // Handle MMX values passed in XMM regs.
3250 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3251 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3252 else if (VA.getValVT().isVector() &&
3253 VA.getValVT().getScalarType() == MVT::i1 &&
3254 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3255 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3256 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3257 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3258 } else
3259 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3260 }
3261 } else {
3262 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3262, __PRETTY_FUNCTION__))
;
3263 ArgValue =
3264 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3265 }
3266
3267 // If value is passed via pointer - do a load.
3268 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3269 ArgValue =
3270 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3271
3272 InVals.push_back(ArgValue);
3273 }
3274
3275 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3276 // Swift calling convention does not require we copy the sret argument
3277 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3278 if (CallConv == CallingConv::Swift)
3279 continue;
3280
3281 // All x86 ABIs require that for returning structs by value we copy the
3282 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3283 // the argument into a virtual register so that we can access it from the
3284 // return points.
3285 if (Ins[I].Flags.isSRet()) {
3286 unsigned Reg = FuncInfo->getSRetReturnReg();
3287 if (!Reg) {
3288 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3289 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3290 FuncInfo->setSRetReturnReg(Reg);
3291 }
3292 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3293 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3294 break;
3295 }
3296 }
3297
3298 unsigned StackSize = CCInfo.getNextStackOffset();
3299 // Align stack specially for tail calls.
3300 if (shouldGuaranteeTCO(CallConv,
3301 MF.getTarget().Options.GuaranteedTailCallOpt))
3302 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3303
3304 // If the function takes variable number of arguments, make a frame index for
3305 // the start of the first vararg value... for expansion of llvm.va_start. We
3306 // can skip this if there are no va_start calls.
3307 if (MFI.hasVAStart() &&
3308 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3309 CallConv != CallingConv::X86_ThisCall))) {
3310 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3311 }
3312
3313 // Figure out if XMM registers are in use.
3314 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3316, __PRETTY_FUNCTION__))
3315 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3316, __PRETTY_FUNCTION__))
3316 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute
::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3316, __PRETTY_FUNCTION__))
;
3317
3318 // 64-bit calling conventions support varargs and register parameters, so we
3319 // have to do extra work to spill them in the prologue.
3320 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3321 // Find the first unallocated argument registers.
3322 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3323 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3324 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3325 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3326 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3327, __PRETTY_FUNCTION__))
3327 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3327, __PRETTY_FUNCTION__))
;
3328
3329 // Gather all the live in physical registers.
3330 SmallVector<SDValue, 6> LiveGPRs;
3331 SmallVector<SDValue, 8> LiveXMMRegs;
3332 SDValue ALVal;
3333 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3334 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3335 LiveGPRs.push_back(
3336 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3337 }
3338 if (!ArgXMMs.empty()) {
3339 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3340 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3341 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3342 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3343 LiveXMMRegs.push_back(
3344 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3345 }
3346 }
3347
3348 if (IsWin64) {
3349 // Get to the caller-allocated home save location. Add 8 to account
3350 // for the return address.
3351 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3352 FuncInfo->setRegSaveFrameIndex(
3353 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3354 // Fixup to set vararg frame on shadow area (4 x i64).
3355 if (NumIntRegs < 4)
3356 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3357 } else {
3358 // For X86-64, if there are vararg parameters that are passed via
3359 // registers, then we must store them to their spots on the stack so
3360 // they may be loaded by dereferencing the result of va_next.
3361 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3362 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3363 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3364 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3365 }
3366
3367 // Store the integer parameter registers.
3368 SmallVector<SDValue, 8> MemOps;
3369 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3370 getPointerTy(DAG.getDataLayout()));
3371 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3372 for (SDValue Val : LiveGPRs) {
3373 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3374 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3375 SDValue Store =
3376 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3377 MachinePointerInfo::getFixedStack(
3378 DAG.getMachineFunction(),
3379 FuncInfo->getRegSaveFrameIndex(), Offset));
3380 MemOps.push_back(Store);
3381 Offset += 8;
3382 }
3383
3384 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3385 // Now store the XMM (fp + vector) parameter registers.
3386 SmallVector<SDValue, 12> SaveXMMOps;
3387 SaveXMMOps.push_back(Chain);
3388 SaveXMMOps.push_back(ALVal);
3389 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3390 FuncInfo->getRegSaveFrameIndex(), dl));
3391 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3392 FuncInfo->getVarArgsFPOffset(), dl));
3393 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3394 LiveXMMRegs.end());
3395 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3396 MVT::Other, SaveXMMOps));
3397 }
3398
3399 if (!MemOps.empty())
3400 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3401 }
3402
3403 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3404 // Find the largest legal vector type.
3405 MVT VecVT = MVT::Other;
3406 // FIXME: Only some x86_32 calling conventions support AVX512.
3407 if (Subtarget.hasAVX512() &&
3408 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3409 CallConv == CallingConv::Intel_OCL_BI)))
3410 VecVT = MVT::v16f32;
3411 else if (Subtarget.hasAVX())
3412 VecVT = MVT::v8f32;
3413 else if (Subtarget.hasSSE2())
3414 VecVT = MVT::v4f32;
3415
3416 // We forward some GPRs and some vector types.
3417 SmallVector<MVT, 2> RegParmTypes;
3418 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3419 RegParmTypes.push_back(IntVT);
3420 if (VecVT != MVT::Other)
3421 RegParmTypes.push_back(VecVT);
3422
3423 // Compute the set of forwarded registers. The rest are scratch.
3424 SmallVectorImpl<ForwardedRegister> &Forwards =
3425 FuncInfo->getForwardedMustTailRegParms();
3426 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3427
3428 // Conservatively forward AL on x86_64, since it might be used for varargs.
3429 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3430 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3431 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3432 }
3433
3434 // Copy all forwards from physical to virtual registers.
3435 for (ForwardedRegister &FR : Forwards) {
3436 // FIXME: Can we use a less constrained schedule?
3437 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
3438 FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
3439 Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
3440 }
3441 }
3442
3443 // Some CCs need callee pop.
3444 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3445 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3446 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3447 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3448 // X86 interrupts must pop the error code (and the alignment padding) if
3449 // present.
3450 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3451 } else {
3452 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3453 // If this is an sret function, the return should pop the hidden pointer.
3454 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3455 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3456 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3457 FuncInfo->setBytesToPopOnReturn(4);
3458 }
3459
3460 if (!Is64Bit) {
3461 // RegSaveFrameIndex is X86-64 only.
3462 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3463 if (CallConv == CallingConv::X86_FastCall ||
3464 CallConv == CallingConv::X86_ThisCall)
3465 // fastcc functions can't have varargs.
3466 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3467 }
3468
3469 FuncInfo->setArgumentStackSize(StackSize);
3470
3471 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3472 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3473 if (Personality == EHPersonality::CoreCLR) {
3474 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3474, __PRETTY_FUNCTION__))
;
3475 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3476 // that we'd prefer this slot be allocated towards the bottom of the frame
3477 // (i.e. near the stack pointer after allocating the frame). Every
3478 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3479 // offset from the bottom of this and each funclet's frame must be the
3480 // same, so the size of funclets' (mostly empty) frames is dictated by
3481 // how far this slot is from the bottom (since they allocate just enough
3482 // space to accommodate holding this slot at the correct offset).
3483 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3484 EHInfo->PSPSymFrameIdx = PSPSymFI;
3485 }
3486 }
3487
3488 if (CallConv == CallingConv::X86_RegCall ||
3489 F.hasFnAttribute("no_caller_saved_registers")) {
3490 MachineRegisterInfo &MRI = MF.getRegInfo();
3491 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3492 MRI.disableCalleeSavedRegister(Pair.first);
3493 }
3494
3495 return Chain;
3496}
3497
3498SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3499 SDValue Arg, const SDLoc &dl,
3500 SelectionDAG &DAG,
3501 const CCValAssign &VA,
3502 ISD::ArgFlagsTy Flags) const {
3503 unsigned LocMemOffset = VA.getLocMemOffset();
3504 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3505 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3506 StackPtr, PtrOff);
3507 if (Flags.isByVal())
3508 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3509
3510 return DAG.getStore(
3511 Chain, dl, Arg, PtrOff,
3512 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3513}
3514
3515/// Emit a load of return address if tail call
3516/// optimization is performed and it is required.
3517SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3518 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3519 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3520 // Adjust the Return address stack slot.
3521 EVT VT = getPointerTy(DAG.getDataLayout());
3522 OutRetAddr = getReturnAddressFrameIndex(DAG);
3523
3524 // Load the "old" Return address.
3525 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3526 return SDValue(OutRetAddr.getNode(), 1);
3527}
3528
3529/// Emit a store of the return address if tail call
3530/// optimization is performed and it is required (FPDiff!=0).
3531static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3532 SDValue Chain, SDValue RetAddrFrIdx,
3533 EVT PtrVT, unsigned SlotSize,
3534 int FPDiff, const SDLoc &dl) {
3535 // Store the return address to the appropriate stack slot.
3536 if (!FPDiff) return Chain;
3537 // Calculate the new stack slot for the return address.
3538 int NewReturnAddrFI =
3539 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3540 false);
3541 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3542 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3543 MachinePointerInfo::getFixedStack(
3544 DAG.getMachineFunction(), NewReturnAddrFI));
3545 return Chain;
3546}
3547
3548/// Returns a vector_shuffle mask for an movs{s|d}, movd
3549/// operation of specified width.
3550static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3551 SDValue V2) {
3552 unsigned NumElems = VT.getVectorNumElements();
3553 SmallVector<int, 8> Mask;
3554 Mask.push_back(NumElems);
3555 for (unsigned i = 1; i != NumElems; ++i)
3556 Mask.push_back(i);
3557 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3558}
3559
3560SDValue
3561X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3562 SmallVectorImpl<SDValue> &InVals) const {
3563 SelectionDAG &DAG = CLI.DAG;
3564 SDLoc &dl = CLI.DL;
3565 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3566 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3567 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3568 SDValue Chain = CLI.Chain;
3569 SDValue Callee = CLI.Callee;
3570 CallingConv::ID CallConv = CLI.CallConv;
3571 bool &isTailCall = CLI.IsTailCall;
3572 bool isVarArg = CLI.IsVarArg;
3573
3574 MachineFunction &MF = DAG.getMachineFunction();
3575 bool Is64Bit = Subtarget.is64Bit();
3576 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3577 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3578 bool IsSibcall = false;
3579 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3580 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3581 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3582 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3583 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3584 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3585 const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
3586 bool HasNoCfCheck =
3587 (CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
3588 const Module *M = MF.getMMI().getModule();
3589 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3590
3591 if (CallConv == CallingConv::X86_INTR)
3592 report_fatal_error("X86 interrupts may not be called directly");
3593
3594 if (Attr.getValueAsString() == "true")
3595 isTailCall = false;
3596
3597 if (Subtarget.isPICStyleGOT() &&
3598 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3599 // If we are using a GOT, disable tail calls to external symbols with
3600 // default visibility. Tail calling such a symbol requires using a GOT
3601 // relocation, which forces early binding of the symbol. This breaks code
3602 // that require lazy function symbol resolution. Using musttail or
3603 // GuaranteedTailCallOpt will override this.
3604 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3605 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3606 G->getGlobal()->hasDefaultVisibility()))
3607 isTailCall = false;
3608 }
3609
3610 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3611 if (IsMustTail) {
3612 // Force this to be a tail call. The verifier rules are enough to ensure
3613 // that we can lower this successfully without moving the return address
3614 // around.
3615 isTailCall = true;
3616 } else if (isTailCall) {
3617 // Check if it's really possible to do a tail call.
3618 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3619 isVarArg, SR != NotStructReturn,
3620 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3621 Outs, OutVals, Ins, DAG);
3622
3623 // Sibcalls are automatically detected tailcalls which do not require
3624 // ABI changes.
3625 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3626 IsSibcall = true;
3627
3628 if (isTailCall)
3629 ++NumTailCalls;
3630 }
3631
3632 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3633, __PRETTY_FUNCTION__))
3633 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3633, __PRETTY_FUNCTION__))
;
3634
3635 // Analyze operands of the call, assigning locations to each operand.
3636 SmallVector<CCValAssign, 16> ArgLocs;
3637 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3638
3639 // Allocate shadow area for Win64.
3640 if (IsWin64)
3641 CCInfo.AllocateStack(32, 8);
3642
3643 CCInfo.AnalyzeArguments(Outs, CC_X86);
3644
3645 // In vectorcall calling convention a second pass is required for the HVA
3646 // types.
3647 if (CallingConv::X86_VectorCall == CallConv) {
3648 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3649 }
3650
3651 // Get a count of how many bytes are to be pushed on the stack.
3652 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3653 if (IsSibcall)
3654 // This is a sibcall. The memory operands are available in caller's
3655 // own caller's stack.
3656 NumBytes = 0;
3657 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3658 canGuaranteeTCO(CallConv))
3659 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3660
3661 int FPDiff = 0;
3662 if (isTailCall && !IsSibcall && !IsMustTail) {
3663 // Lower arguments at fp - stackoffset + fpdiff.
3664 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3665
3666 FPDiff = NumBytesCallerPushed - NumBytes;
3667
3668 // Set the delta of movement of the returnaddr stackslot.
3669 // But only set if delta is greater than previous delta.
3670 if (FPDiff < X86Info->getTCReturnAddrDelta())
3671 X86Info->setTCReturnAddrDelta(FPDiff);
3672 }
3673
3674 unsigned NumBytesToPush = NumBytes;
3675 unsigned NumBytesToPop = NumBytes;
3676
3677 // If we have an inalloca argument, all stack space has already been allocated
3678 // for us and be right at the top of the stack. We don't support multiple
3679 // arguments passed in memory when using inalloca.
3680 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3681 NumBytesToPush = 0;
3682 if (!ArgLocs.back().isMemLoc())
3683 report_fatal_error("cannot use inalloca attribute on a register "
3684 "parameter");
3685 if (ArgLocs.back().getLocMemOffset() != 0)
3686 report_fatal_error("any parameter with the inalloca attribute must be "
3687 "the only memory argument");
3688 }
3689
3690 if (!IsSibcall)
3691 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3692 NumBytes - NumBytesToPush, dl);
3693
3694 SDValue RetAddrFrIdx;
3695 // Load return address for tail calls.
3696 if (isTailCall && FPDiff)
3697 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3698 Is64Bit, FPDiff, dl);
3699
3700 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3701 SmallVector<SDValue, 8> MemOpChains;
3702 SDValue StackPtr;
3703
3704 // The next loop assumes that the locations are in the same order of the
3705 // input arguments.
3706 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3707, __PRETTY_FUNCTION__))
3707 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3707, __PRETTY_FUNCTION__))
;
3708
3709 // Walk the register/memloc assignments, inserting copies/loads. In the case
3710 // of tail call optimization arguments are handle later.
3711 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3712 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3713 ++I, ++OutIndex) {
3714 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3714, __PRETTY_FUNCTION__))
;
3715 // Skip inalloca arguments, they have already been written.
3716 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3717 if (Flags.isInAlloca())
3718 continue;
3719
3720 CCValAssign &VA = ArgLocs[I];
3721 EVT RegVT = VA.getLocVT();
3722 SDValue Arg = OutVals[OutIndex];
3723 bool isByVal = Flags.isByVal();
3724
3725 // Promote the value if needed.
3726 switch (VA.getLocInfo()) {
3727 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3727)
;
3728 case CCValAssign::Full: break;
3729 case CCValAssign::SExt:
3730 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3731 break;
3732 case CCValAssign::ZExt:
3733 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3734 break;
3735 case CCValAssign::AExt:
3736 if (Arg.getValueType().isVector() &&
3737 Arg.getValueType().getVectorElementType() == MVT::i1)
3738 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3739 else if (RegVT.is128BitVector()) {
3740 // Special case: passing MMX values in XMM registers.
3741 Arg = DAG.getBitcast(MVT::i64, Arg);
3742 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3743 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3744 } else
3745 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3746 break;
3747 case CCValAssign::BCvt:
3748 Arg = DAG.getBitcast(RegVT, Arg);
3749 break;
3750 case CCValAssign::Indirect: {
3751 if (isByVal) {
3752 // Memcpy the argument to a temporary stack slot to prevent
3753 // the caller from seeing any modifications the callee may make
3754 // as guaranteed by the `byval` attribute.
3755 int FrameIdx = MF.getFrameInfo().CreateStackObject(
3756 Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
3757 false);
3758 SDValue StackSlot =
3759 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
3760 Chain =
3761 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
3762 // From now on treat this as a regular pointer
3763 Arg = StackSlot;
3764 isByVal = false;
3765 } else {
3766 // Store the argument.
3767 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3768 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3769 Chain = DAG.getStore(
3770 Chain, dl, Arg, SpillSlot,
3771 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3772 Arg = SpillSlot;
3773 }
3774 break;
3775 }
3776 }
3777
3778 if (VA.needsCustom()) {
3779 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3780, __PRETTY_FUNCTION__))
3780 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3780, __PRETTY_FUNCTION__))
;
3781 // Split v64i1 value into two registers
3782 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3783 Subtarget);
3784 } else if (VA.isRegLoc()) {
3785 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3786 if (isVarArg && IsWin64) {
3787 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3788 // shadow reg if callee is a varargs function.
3789 unsigned ShadowReg = 0;
3790 switch (VA.getLocReg()) {
3791 case X86::XMM0: ShadowReg = X86::RCX; break;
3792 case X86::XMM1: ShadowReg = X86::RDX; break;
3793 case X86::XMM2: ShadowReg = X86::R8; break;
3794 case X86::XMM3: ShadowReg = X86::R9; break;
3795 }
3796 if (ShadowReg)
3797 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3798 }
3799 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3800 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3800, __PRETTY_FUNCTION__))
;
3801 if (!StackPtr.getNode())
3802 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3803 getPointerTy(DAG.getDataLayout()));
3804 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3805 dl, DAG, VA, Flags));
3806 }
3807 }
3808
3809 if (!MemOpChains.empty())
3810 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3811
3812 if (Subtarget.isPICStyleGOT()) {
3813 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3814 // GOT pointer.
3815 if (!isTailCall) {
3816 RegsToPass.push_back(std::make_pair(
3817 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3818 getPointerTy(DAG.getDataLayout()))));
3819 } else {
3820 // If we are tail calling and generating PIC/GOT style code load the
3821 // address of the callee into ECX. The value in ecx is used as target of
3822 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3823 // for tail calls on PIC/GOT architectures. Normally we would just put the
3824 // address of GOT into ebx and then call target@PLT. But for tail calls
3825 // ebx would be restored (since ebx is callee saved) before jumping to the
3826 // target@PLT.
3827
3828 // Note: The actual moving to ECX is done further down.
3829 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3830 if (G && !G->getGlobal()->hasLocalLinkage() &&
3831 G->getGlobal()->hasDefaultVisibility())
3832 Callee = LowerGlobalAddress(Callee, DAG);
3833 else if (isa<ExternalSymbolSDNode>(Callee))
3834 Callee = LowerExternalSymbol(Callee, DAG);
3835 }
3836 }
3837
3838 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3839 // From AMD64 ABI document:
3840 // For calls that may call functions that use varargs or stdargs
3841 // (prototype-less calls or calls to functions containing ellipsis (...) in
3842 // the declaration) %al is used as hidden argument to specify the number
3843 // of SSE registers used. The contents of %al do not need to match exactly
3844 // the number of registers, but must be an ubound on the number of SSE
3845 // registers used and is in the range 0 - 8 inclusive.
3846
3847 // Count the number of XMM registers allocated.
3848 static const MCPhysReg XMMArgRegs[] = {
3849 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3850 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3851 };
3852 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3853 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3854, __PRETTY_FUNCTION__))
3854 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3854, __PRETTY_FUNCTION__))
;
3855
3856 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3857 DAG.getConstant(NumXMMRegs, dl,
3858 MVT::i8)));
3859 }
3860
3861 if (isVarArg && IsMustTail) {
3862 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3863 for (const auto &F : Forwards) {
3864 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3865 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3866 }
3867 }
3868
3869 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3870 // don't need this because the eligibility check rejects calls that require
3871 // shuffling arguments passed in memory.
3872 if (!IsSibcall && isTailCall) {
3873 // Force all the incoming stack arguments to be loaded from the stack
3874 // before any new outgoing arguments are stored to the stack, because the
3875 // outgoing stack slots may alias the incoming argument stack slots, and
3876 // the alias isn't otherwise explicit. This is slightly more conservative
3877 // than necessary, because it means that each store effectively depends
3878 // on every argument instead of just those arguments it would clobber.
3879 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3880
3881 SmallVector<SDValue, 8> MemOpChains2;
3882 SDValue FIN;
3883 int FI = 0;
3884 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3885 ++I, ++OutsIndex) {
3886 CCValAssign &VA = ArgLocs[I];
3887
3888 if (VA.isRegLoc()) {
3889 if (VA.needsCustom()) {
3890 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3891, __PRETTY_FUNCTION__))
3891 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3891, __PRETTY_FUNCTION__))
;
3892 // This means that we are in special case where one argument was
3893 // passed through two register locations - Skip the next location
3894 ++I;
3895 }
3896
3897 continue;
3898 }
3899
3900 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3900, __PRETTY_FUNCTION__))
;
3901 SDValue Arg = OutVals[OutsIndex];
3902 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3903 // Skip inalloca arguments. They don't require any work.
3904 if (Flags.isInAlloca())
3905 continue;
3906 // Create frame index.
3907 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3908 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3909 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3910 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3911
3912 if (Flags.isByVal()) {
3913 // Copy relative to framepointer.
3914 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3915 if (!StackPtr.getNode())
3916 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3917 getPointerTy(DAG.getDataLayout()));
3918 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3919 StackPtr, Source);
3920
3921 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3922 ArgChain,
3923 Flags, DAG, dl));
3924 } else {
3925 // Store relative to framepointer.
3926 MemOpChains2.push_back(DAG.getStore(
3927 ArgChain, dl, Arg, FIN,
3928 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3929 }
3930 }
3931
3932 if (!MemOpChains2.empty())
3933 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3934
3935 // Store the return address to the appropriate stack slot.
3936 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3937 getPointerTy(DAG.getDataLayout()),
3938 RegInfo->getSlotSize(), FPDiff, dl);
3939 }
3940
3941 // Build a sequence of copy-to-reg nodes chained together with token chain
3942 // and flag operands which copy the outgoing args into registers.
3943 SDValue InFlag;
3944 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3945 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3946 RegsToPass[i].second, InFlag);
3947 InFlag = Chain.getValue(1);
3948 }
3949
3950 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3951 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3951, __PRETTY_FUNCTION__))
;
3952 // In the 64-bit large code model, we have to make all calls
3953 // through a register, since the call instruction's 32-bit
3954 // pc-relative offset may not be large enough to hold the whole
3955 // address.
3956 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
3957 Callee->getOpcode() == ISD::ExternalSymbol) {
3958 // Lower direct calls to global addresses and external symbols. Setting
3959 // ForCall to true here has the effect of removing WrapperRIP when possible
3960 // to allow direct calls to be selected without first materializing the
3961 // address into a register.
3962 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
3963 } else if (Subtarget.isTarget64BitILP32() &&
3964 Callee->getValueType(0) == MVT::i32) {
3965 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3966 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3967 }
3968
3969 // Returns a chain & a flag for retval copy to use.
3970 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3971 SmallVector<SDValue, 8> Ops;
3972
3973 if (!IsSibcall && isTailCall) {
3974 Chain = DAG.getCALLSEQ_END(Chain,
3975 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3976 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3977 InFlag = Chain.getValue(1);
3978 }
3979
3980 Ops.push_back(Chain);
3981 Ops.push_back(Callee);
3982
3983 if (isTailCall)
3984 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3985
3986 // Add argument registers to the end of the list so that they are known live
3987 // into the call.
3988 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3989 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3990 RegsToPass[i].second.getValueType()));
3991
3992 // Add a register mask operand representing the call-preserved registers.
3993 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3994 // set X86_INTR calling convention because it has the same CSR mask
3995 // (same preserved registers).
3996 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3997 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3998 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 3998, __PRETTY_FUNCTION__))
;
3999
4000 // If this is an invoke in a 32-bit function using a funclet-based
4001 // personality, assume the function clobbers all registers. If an exception
4002 // is thrown, the runtime will not restore CSRs.
4003 // FIXME: Model this more precisely so that we can register allocate across
4004 // the normal edge and spill and fill across the exceptional edge.
4005 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
4006 const Function &CallerFn = MF.getFunction();
4007 EHPersonality Pers =
4008 CallerFn.hasPersonalityFn()
4009 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4010 : EHPersonality::Unknown;
4011 if (isFuncletEHPersonality(Pers))
4012 Mask = RegInfo->getNoPreservedMask();
4013 }
4014
4015 // Define a new register mask from the existing mask.
4016 uint32_t *RegMask = nullptr;
4017
4018 // In some calling conventions we need to remove the used physical registers
4019 // from the reg mask.
4020 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4021 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4022
4023 // Allocate a new Reg Mask and copy Mask.
4024 RegMask = MF.allocateRegMask();
4025 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4026 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4027
4028 // Make sure all sub registers of the argument registers are reset
4029 // in the RegMask.
4030 for (auto const &RegPair : RegsToPass)
4031 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4032 SubRegs.isValid(); ++SubRegs)
4033 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4034
4035 // Create the RegMask Operand according to our updated mask.
4036 Ops.push_back(DAG.getRegisterMask(RegMask));
4037 } else {
4038 // Create the RegMask Operand according to the static mask.
4039 Ops.push_back(DAG.getRegisterMask(Mask));
4040 }
4041
4042 if (InFlag.getNode())
4043 Ops.push_back(InFlag);
4044
4045 if (isTailCall) {
4046 // We used to do:
4047 //// If this is the first return lowered for this function, add the regs
4048 //// to the liveout set for the function.
4049 // This isn't right, although it's probably harmless on x86; liveouts
4050 // should be computed from returns not tail calls. Consider a void
4051 // function making a tail call to a function returning int.
4052 MF.getFrameInfo().setHasTailCall();
4053 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4054 }
4055
4056 if (HasNoCfCheck && IsCFProtectionSupported) {
4057 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4058 } else {
4059 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4060 }
4061 InFlag = Chain.getValue(1);
4062
4063 // Create the CALLSEQ_END node.
4064 unsigned NumBytesForCalleeToPop;
4065 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4066 DAG.getTarget().Options.GuaranteedTailCallOpt))
4067 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4068 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4069 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4070 SR == StackStructReturn)
4071 // If this is a call to a struct-return function, the callee
4072 // pops the hidden struct pointer, so we have to push it back.
4073 // This is common for Darwin/X86, Linux & Mingw32 targets.
4074 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4075 NumBytesForCalleeToPop = 4;
4076 else
4077 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4078
4079 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
4080 // No need to reset the stack after the call if the call doesn't return. To
4081 // make the MI verify, we'll pretend the callee does it for us.
4082 NumBytesForCalleeToPop = NumBytes;
4083 }
4084
4085 // Returns a flag for retval copy to use.
4086 if (!IsSibcall) {
4087 Chain = DAG.getCALLSEQ_END(Chain,
4088 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4089 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4090 true),
4091 InFlag, dl);
4092 InFlag = Chain.getValue(1);
4093 }
4094
4095 // Handle result values, copying them out of physregs into vregs that we
4096 // return.
4097 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4098 InVals, RegMask);
4099}
4100
4101//===----------------------------------------------------------------------===//
4102// Fast Calling Convention (tail call) implementation
4103//===----------------------------------------------------------------------===//
4104
4105// Like std call, callee cleans arguments, convention except that ECX is
4106// reserved for storing the tail called function address. Only 2 registers are
4107// free for argument passing (inreg). Tail call optimization is performed
4108// provided:
4109// * tailcallopt is enabled
4110// * caller/callee are fastcc
4111// On X86_64 architecture with GOT-style position independent code only local
4112// (within module) calls are supported at the moment.
4113// To keep the stack aligned according to platform abi the function
4114// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4115// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
4116// If a tail called function callee has more arguments than the caller the
4117// caller needs to make sure that there is room to move the RETADDR to. This is
4118// achieved by reserving an area the size of the argument delta right after the
4119// original RETADDR, but before the saved framepointer or the spilled registers
4120// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4121// stack layout:
4122// arg1
4123// arg2
4124// RETADDR
4125// [ new RETADDR
4126// move area ]
4127// (possible EBP)
4128// ESI
4129// EDI
4130// local1 ..
4131
4132/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4133/// requirement.
4134unsigned
4135X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
4136 SelectionDAG& DAG) const {
4137 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4138 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
4139 unsigned StackAlignment = TFI.getStackAlignment();
4140 uint64_t AlignMask = StackAlignment - 1;
4141 int64_t Offset = StackSize;
4142 unsigned SlotSize = RegInfo->getSlotSize();
4143 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
4144 // Number smaller than 12 so just add the difference.
4145 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
4146 } else {
4147 // Mask out lower bits, add stackalignment once plus the 12 bytes.
4148 Offset = ((~AlignMask) & Offset) + StackAlignment +
4149 (StackAlignment-SlotSize);
4150 }
4151 return Offset;
4152}
4153
4154/// Return true if the given stack call argument is already available in the
4155/// same position (relatively) of the caller's incoming argument stack.
4156static
4157bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4158 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4159 const X86InstrInfo *TII, const CCValAssign &VA) {
4160 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4161
4162 for (;;) {
4163 // Look through nodes that don't alter the bits of the incoming value.
4164 unsigned Op = Arg.getOpcode();
4165 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4166 Arg = Arg.getOperand(0);
4167 continue;
4168 }
4169 if (Op == ISD::TRUNCATE) {
4170 const SDValue &TruncInput = Arg.getOperand(0);
4171 if (TruncInput.getOpcode() == ISD::AssertZext &&
4172 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4173 Arg.getValueType()) {
4174 Arg = TruncInput.getOperand(0);
4175 continue;
4176 }
4177 }
4178 break;
4179 }
4180
4181 int FI = INT_MAX2147483647;
4182 if (Arg.getOpcode() == ISD::CopyFromReg) {
4183 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4184 if (!TargetRegisterInfo::isVirtualRegister(VR))
4185 return false;
4186 MachineInstr *Def = MRI->getVRegDef(VR);
4187 if (!Def)
4188 return false;
4189 if (!Flags.isByVal()) {
4190 if (!TII->isLoadFromStackSlot(*Def, FI))
4191 return false;
4192 } else {
4193 unsigned Opcode = Def->getOpcode();
4194 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4195 Opcode == X86::LEA64_32r) &&
4196 Def->getOperand(1).isFI()) {
4197 FI = Def->getOperand(1).getIndex();
4198 Bytes = Flags.getByValSize();
4199 } else
4200 return false;
4201 }
4202 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4203 if (Flags.isByVal())
4204 // ByVal argument is passed in as a pointer but it's now being
4205 // dereferenced. e.g.
4206 // define @foo(%struct.X* %A) {
4207 // tail call @bar(%struct.X* byval %A)
4208 // }
4209 return false;
4210 SDValue Ptr = Ld->getBasePtr();
4211 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4212 if (!FINode)
4213 return false;
4214 FI = FINode->getIndex();
4215 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4216 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4217 FI = FINode->getIndex();
4218 Bytes = Flags.getByValSize();
4219 } else
4220 return false;
4221
4222 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4222, __PRETTY_FUNCTION__))
;
4223 if (!MFI.isFixedObjectIndex(FI))
4224 return false;
4225
4226 if (Offset != MFI.getObjectOffset(FI))
4227 return false;
4228
4229 // If this is not byval, check that the argument stack object is immutable.
4230 // inalloca and argument copy elision can create mutable argument stack
4231 // objects. Byval objects can be mutated, but a byval call intends to pass the
4232 // mutated memory.
4233 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4234 return false;
4235
4236 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4237 // If the argument location is wider than the argument type, check that any
4238 // extension flags match.
4239 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4240 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4241 return false;
4242 }
4243 }
4244
4245 return Bytes == MFI.getObjectSize(FI);
4246}
4247
4248/// Check whether the call is eligible for tail call optimization. Targets
4249/// that want to do tail call optimization should implement this function.
4250bool X86TargetLowering::IsEligibleForTailCallOptimization(
4251 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4252 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4253 const SmallVectorImpl<ISD::OutputArg> &Outs,
4254 const SmallVectorImpl<SDValue> &OutVals,
4255 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4256 if (!mayTailCallThisCC(CalleeCC))
4257 return false;
4258
4259 // If -tailcallopt is specified, make fastcc functions tail-callable.
4260 MachineFunction &MF = DAG.getMachineFunction();
4261 const Function &CallerF = MF.getFunction();
4262
4263 // If the function return type is x86_fp80 and the callee return type is not,
4264 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4265 // perform a tailcall optimization here.
4266 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4267 return false;
4268
4269 CallingConv::ID CallerCC = CallerF.getCallingConv();
4270 bool CCMatch = CallerCC == CalleeCC;
4271 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4272 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4273
4274 // Win64 functions have extra shadow space for argument homing. Don't do the
4275 // sibcall if the caller and callee have mismatched expectations for this
4276 // space.
4277 if (IsCalleeWin64 != IsCallerWin64)
4278 return false;
4279
4280 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4281 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4282 return true;
4283 return false;
4284 }
4285
4286 // Look for obvious safe cases to perform tail call optimization that do not
4287 // require ABI changes. This is what gcc calls sibcall.
4288
4289 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4290 // emit a special epilogue.
4291 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4292 if (RegInfo->needsStackRealignment(MF))
4293 return false;
4294
4295 // Also avoid sibcall optimization if either caller or callee uses struct
4296 // return semantics.
4297 if (isCalleeStructRet || isCallerStructRet)
4298 return false;
4299
4300 // Do not sibcall optimize vararg calls unless all arguments are passed via
4301 // registers.
4302 LLVMContext &C = *DAG.getContext();
4303 if (isVarArg && !Outs.empty()) {
4304 // Optimizing for varargs on Win64 is unlikely to be safe without
4305 // additional testing.
4306 if (IsCalleeWin64 || IsCallerWin64)
4307 return false;
4308
4309 SmallVector<CCValAssign, 16> ArgLocs;
4310 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4311
4312 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4313 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4314 if (!ArgLocs[i].isRegLoc())
4315 return false;
4316 }
4317
4318 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4319 // stack. Therefore, if it's not used by the call it is not safe to optimize
4320 // this into a sibcall.
4321 bool Unused = false;
4322 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4323 if (!Ins[i].Used) {
4324 Unused = true;
4325 break;
4326 }
4327 }
4328 if (Unused) {
4329 SmallVector<CCValAssign, 16> RVLocs;
4330 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4331 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4332 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4333 CCValAssign &VA = RVLocs[i];
4334 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4335 return false;
4336 }
4337 }
4338
4339 // Check that the call results are passed in the same way.
4340 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4341 RetCC_X86, RetCC_X86))
4342 return false;
4343 // The callee has to preserve all registers the caller needs to preserve.
4344 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4345 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4346 if (!CCMatch) {
4347 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4348 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4349 return false;
4350 }
4351
4352 unsigned StackArgsSize = 0;
4353
4354 // If the callee takes no arguments then go on to check the results of the
4355 // call.
4356 if (!Outs.empty()) {
4357 // Check if stack adjustment is needed. For now, do not do this if any
4358 // argument is passed on the stack.
4359 SmallVector<CCValAssign, 16> ArgLocs;
4360 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4361
4362 // Allocate shadow area for Win64
4363 if (IsCalleeWin64)
4364 CCInfo.AllocateStack(32, 8);
4365
4366 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4367 StackArgsSize = CCInfo.getNextStackOffset();
4368
4369 if (CCInfo.getNextStackOffset()) {
4370 // Check if the arguments are already laid out in the right way as
4371 // the caller's fixed stack objects.
4372 MachineFrameInfo &MFI = MF.getFrameInfo();
4373 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4374 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4375 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4376 CCValAssign &VA = ArgLocs[i];
4377 SDValue Arg = OutVals[i];
4378 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4379 if (VA.getLocInfo() == CCValAssign::Indirect)
4380 return false;
4381 if (!VA.isRegLoc()) {
4382 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4383 MFI, MRI, TII, VA))
4384 return false;
4385 }
4386 }
4387 }
4388
4389 bool PositionIndependent = isPositionIndependent();
4390 // If the tailcall address may be in a register, then make sure it's
4391 // possible to register allocate for it. In 32-bit, the call address can
4392 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4393 // callee-saved registers are restored. These happen to be the same
4394 // registers used to pass 'inreg' arguments so watch out for those.
4395 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4396 !isa<ExternalSymbolSDNode>(Callee)) ||
4397 PositionIndependent)) {
4398 unsigned NumInRegs = 0;
4399 // In PIC we need an extra register to formulate the address computation
4400 // for the callee.
4401 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4402
4403 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4404 CCValAssign &VA = ArgLocs[i];
4405 if (!VA.isRegLoc())
4406 continue;
4407 unsigned Reg = VA.getLocReg();
4408 switch (Reg) {
4409 default: break;
4410 case X86::EAX: case X86::EDX: case X86::ECX:
4411 if (++NumInRegs == MaxInRegs)
4412 return false;
4413 break;
4414 }
4415 }
4416 }
4417
4418 const MachineRegisterInfo &MRI = MF.getRegInfo();
4419 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4420 return false;
4421 }
4422
4423 bool CalleeWillPop =
4424 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4425 MF.getTarget().Options.GuaranteedTailCallOpt);
4426
4427 if (unsigned BytesToPop =
4428 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4429 // If we have bytes to pop, the callee must pop them.
4430 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4431 if (!CalleePopMatches)
4432 return false;
4433 } else if (CalleeWillPop && StackArgsSize > 0) {
4434 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4435 return false;
4436 }
4437
4438 return true;
4439}
4440
4441FastISel *
4442X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4443 const TargetLibraryInfo *libInfo) const {
4444 return X86::createFastISel(funcInfo, libInfo);
4445}
4446
4447//===----------------------------------------------------------------------===//
4448// Other Lowering Hooks
4449//===----------------------------------------------------------------------===//
4450
4451static bool MayFoldLoad(SDValue Op) {
4452 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4453}
4454
4455static bool MayFoldIntoStore(SDValue Op) {
4456 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4457}
4458
4459static bool MayFoldIntoZeroExtend(SDValue Op) {
4460 if (Op.hasOneUse()) {
4461 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4462 return (ISD::ZERO_EXTEND == Opcode);
4463 }
4464 return false;
4465}
4466
4467static bool isTargetShuffle(unsigned Opcode) {
4468 switch(Opcode) {
4469 default: return false;
4470 case X86ISD::BLENDI:
4471 case X86ISD::PSHUFB:
4472 case X86ISD::PSHUFD:
4473 case X86ISD::PSHUFHW:
4474 case X86ISD::PSHUFLW:
4475 case X86ISD::SHUFP:
4476 case X86ISD::INSERTPS:
4477 case X86ISD::EXTRQI:
4478 case X86ISD::INSERTQI:
4479 case X86ISD::PALIGNR:
4480 case X86ISD::VSHLDQ:
4481 case X86ISD::VSRLDQ:
4482 case X86ISD::MOVLHPS:
4483 case X86ISD::MOVHLPS:
4484 case X86ISD::MOVSHDUP:
4485 case X86ISD::MOVSLDUP:
4486 case X86ISD::MOVDDUP:
4487 case X86ISD::MOVSS:
4488 case X86ISD::MOVSD:
4489 case X86ISD::UNPCKL:
4490 case X86ISD::UNPCKH:
4491 case X86ISD::VBROADCAST:
4492 case X86ISD::VPERMILPI:
4493 case X86ISD::VPERMILPV:
4494 case X86ISD::VPERM2X128:
4495 case X86ISD::SHUF128:
4496 case X86ISD::VPERMIL2:
4497 case X86ISD::VPERMI:
4498 case X86ISD::VPPERM:
4499 case X86ISD::VPERMV:
4500 case X86ISD::VPERMV3:
4501 case X86ISD::VZEXT_MOVL:
4502 return true;
4503 }
4504}
4505
4506static bool isTargetShuffleVariableMask(unsigned Opcode) {
4507 switch (Opcode) {
4508 default: return false;
4509 // Target Shuffles.
4510 case X86ISD::PSHUFB:
4511 case X86ISD::VPERMILPV:
4512 case X86ISD::VPERMIL2:
4513 case X86ISD::VPPERM:
4514 case X86ISD::VPERMV:
4515 case X86ISD::VPERMV3:
4516 return true;
4517 // 'Faux' Target Shuffles.
4518 case ISD::OR:
4519 case ISD::AND:
4520 case X86ISD::ANDNP:
4521 return true;
4522 }
4523}
4524
4525SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4526 MachineFunction &MF = DAG.getMachineFunction();
4527 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4528 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4529 int ReturnAddrIndex = FuncInfo->getRAIndex();
4530
4531 if (ReturnAddrIndex == 0) {
4532 // Set up a frame object for the return address.
4533 unsigned SlotSize = RegInfo->getSlotSize();
4534 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4535 -(int64_t)SlotSize,
4536 false);
4537 FuncInfo->setRAIndex(ReturnAddrIndex);
4538 }
4539
4540 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4541}
4542
4543bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4544 bool hasSymbolicDisplacement) {
4545 // Offset should fit into 32 bit immediate field.
4546 if (!isInt<32>(Offset))
4547 return false;
4548
4549 // If we don't have a symbolic displacement - we don't have any extra
4550 // restrictions.
4551 if (!hasSymbolicDisplacement)
4552 return true;
4553
4554 // FIXME: Some tweaks might be needed for medium code model.
4555 if (M != CodeModel::Small && M != CodeModel::Kernel)
4556 return false;
4557
4558 // For small code model we assume that latest object is 16MB before end of 31
4559 // bits boundary. We may also accept pretty large negative constants knowing
4560 // that all objects are in the positive half of address space.
4561 if (M == CodeModel::Small && Offset < 16*1024*1024)
4562 return true;
4563
4564 // For kernel code model we know that all object resist in the negative half
4565 // of 32bits address space. We may not accept negative offsets, since they may
4566 // be just off and we may accept pretty large positive ones.
4567 if (M == CodeModel::Kernel && Offset >= 0)
4568 return true;
4569
4570 return false;
4571}
4572
4573/// Determines whether the callee is required to pop its own arguments.
4574/// Callee pop is necessary to support tail calls.
4575bool X86::isCalleePop(CallingConv::ID CallingConv,
4576 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4577 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4578 // can guarantee TCO.
4579 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4580 return true;
4581
4582 switch (CallingConv) {
4583 default:
4584 return false;
4585 case CallingConv::X86_StdCall:
4586 case CallingConv::X86_FastCall:
4587 case CallingConv::X86_ThisCall:
4588 case CallingConv::X86_VectorCall:
4589 return !is64Bit;
4590 }
4591}
4592
4593/// Return true if the condition is an unsigned comparison operation.
4594static bool isX86CCUnsigned(unsigned X86CC) {
4595 switch (X86CC) {
4596 default:
4597 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4597)
;
4598 case X86::COND_E:
4599 case X86::COND_NE:
4600 case X86::COND_B:
4601 case X86::COND_A:
4602 case X86::COND_BE:
4603 case X86::COND_AE:
4604 return true;
4605 case X86::COND_G:
4606 case X86::COND_GE:
4607 case X86::COND_L:
4608 case X86::COND_LE:
4609 return false;
4610 }
4611}
4612
4613static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4614 switch (SetCCOpcode) {
4615 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4615)
;
4616 case ISD::SETEQ: return X86::COND_E;
4617 case ISD::SETGT: return X86::COND_G;
4618 case ISD::SETGE: return X86::COND_GE;
4619 case ISD::SETLT: return X86::COND_L;
4620 case ISD::SETLE: return X86::COND_LE;
4621 case ISD::SETNE: return X86::COND_NE;
4622 case ISD::SETULT: return X86::COND_B;
4623 case ISD::SETUGT: return X86::COND_A;
4624 case ISD::SETULE: return X86::COND_BE;
4625 case ISD::SETUGE: return X86::COND_AE;
4626 }
4627}
4628
4629/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4630/// condition code, returning the condition code and the LHS/RHS of the
4631/// comparison to make.
4632static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4633 bool isFP, SDValue &LHS, SDValue &RHS,
4634 SelectionDAG &DAG) {
4635 if (!isFP) {
4636 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4637 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4638 // X > -1 -> X == 0, jump !sign.
4639 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4640 return X86::COND_NS;
4641 }
4642 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4643 // X < 0 -> X == 0, jump on sign.
4644 return X86::COND_S;
4645 }
4646 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4647 // X < 1 -> X <= 0
4648 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4649 return X86::COND_LE;
4650 }
4651 }
4652
4653 return TranslateIntegerX86CC(SetCCOpcode);
4654 }
4655
4656 // First determine if it is required or is profitable to flip the operands.
4657
4658 // If LHS is a foldable load, but RHS is not, flip the condition.
4659 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4660 !ISD::isNON_EXTLoad(RHS.getNode())) {
4661 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4662 std::swap(LHS, RHS);
4663 }
4664
4665 switch (SetCCOpcode) {
4666 default: break;
4667 case ISD::SETOLT:
4668 case ISD::SETOLE:
4669 case ISD::SETUGT:
4670 case ISD::SETUGE:
4671 std::swap(LHS, RHS);
4672 break;
4673 }
4674
4675 // On a floating point condition, the flags are set as follows:
4676 // ZF PF CF op
4677 // 0 | 0 | 0 | X > Y
4678 // 0 | 0 | 1 | X < Y
4679 // 1 | 0 | 0 | X == Y
4680 // 1 | 1 | 1 | unordered
4681 switch (SetCCOpcode) {
4682 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4682)
;
4683 case ISD::SETUEQ:
4684 case ISD::SETEQ: return X86::COND_E;
4685 case ISD::SETOLT: // flipped
4686 case ISD::SETOGT:
4687 case ISD::SETGT: return X86::COND_A;
4688 case ISD::SETOLE: // flipped
4689 case ISD::SETOGE:
4690 case ISD::SETGE: return X86::COND_AE;
4691 case ISD::SETUGT: // flipped
4692 case ISD::SETULT:
4693 case ISD::SETLT: return X86::COND_B;
4694 case ISD::SETUGE: // flipped
4695 case ISD::SETULE:
4696 case ISD::SETLE: return X86::COND_BE;
4697 case ISD::SETONE:
4698 case ISD::SETNE: return X86::COND_NE;
4699 case ISD::SETUO: return X86::COND_P;
4700 case ISD::SETO: return X86::COND_NP;
4701 case ISD::SETOEQ:
4702 case ISD::SETUNE: return X86::COND_INVALID;
4703 }
4704}
4705
4706/// Is there a floating point cmov for the specific X86 condition code?
4707/// Current x86 isa includes the following FP cmov instructions:
4708/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4709static bool hasFPCMov(unsigned X86CC) {
4710 switch (X86CC) {
4711 default:
4712 return false;
4713 case X86::COND_B:
4714 case X86::COND_BE:
4715 case X86::COND_E:
4716 case X86::COND_P:
4717 case X86::COND_A:
4718 case X86::COND_AE:
4719 case X86::COND_NE:
4720 case X86::COND_NP:
4721 return true;
4722 }
4723}
4724
4725
4726bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4727 const CallInst &I,
4728 MachineFunction &MF,
4729 unsigned Intrinsic) const {
4730
4731 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4732 if (!IntrData)
4733 return false;
4734
4735 Info.flags = MachineMemOperand::MONone;
4736 Info.offset = 0;
4737
4738 switch (IntrData->Type) {
4739 case TRUNCATE_TO_MEM_VI8:
4740 case TRUNCATE_TO_MEM_VI16:
4741 case TRUNCATE_TO_MEM_VI32: {
4742 Info.opc = ISD::INTRINSIC_VOID;
4743 Info.ptrVal = I.getArgOperand(0);
4744 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4745 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4746 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4747 ScalarVT = MVT::i8;
4748 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4749 ScalarVT = MVT::i16;
4750 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4751 ScalarVT = MVT::i32;
4752
4753 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4754 Info.align = 1;
4755 Info.flags |= MachineMemOperand::MOStore;
4756 break;
4757 }
4758 case GATHER:
4759 case GATHER_AVX2: {
4760 Info.opc = ISD::INTRINSIC_W_CHAIN;
4761 Info.ptrVal = nullptr;
4762 MVT DataVT = MVT::getVT(I.getType());
4763 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4764 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4765 IndexVT.getVectorNumElements());
4766 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4767 Info.align = 1;
4768 Info.flags |= MachineMemOperand::MOLoad;
4769 break;
4770 }
4771 case SCATTER: {
4772 Info.opc = ISD::INTRINSIC_VOID;
4773 Info.ptrVal = nullptr;
4774 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
4775 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
4776 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
4777 IndexVT.getVectorNumElements());
4778 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
4779 Info.align = 1;
4780 Info.flags |= MachineMemOperand::MOStore;
4781 break;
4782 }
4783 default:
4784 return false;
4785 }
4786
4787 return true;
4788}
4789
4790/// Returns true if the target can instruction select the
4791/// specified FP immediate natively. If false, the legalizer will
4792/// materialize the FP immediate as a load from a constant pool.
4793bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4794 bool ForCodeSize) const {
4795 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4796 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4797 return true;
4798 }
4799 return false;
4800}
4801
4802bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4803 ISD::LoadExtType ExtTy,
4804 EVT NewVT) const {
4805 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4806 // relocation target a movq or addq instruction: don't let the load shrink.
4807 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4808 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4809 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4810 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4811 return true;
4812}
4813
4814/// Returns true if it is beneficial to convert a load of a constant
4815/// to just the constant itself.
4816bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4817 Type *Ty) const {
4818 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4818, __PRETTY_FUNCTION__))
;
4819
4820 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4821 if (BitSize == 0 || BitSize > 64)
4822 return false;
4823 return true;
4824}
4825
4826bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
4827 // If we are using XMM registers in the ABI and the condition of the select is
4828 // a floating-point compare and we have blendv or conditional move, then it is
4829 // cheaper to select instead of doing a cross-register move and creating a
4830 // load that depends on the compare result.
4831 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
4832}
4833
4834bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4835 // TODO: It might be a win to ease or lift this restriction, but the generic
4836 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4837 if (VT.isVector() && Subtarget.hasAVX512())
4838 return false;
4839
4840 return true;
4841}
4842
4843bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
4844 // TODO: We handle scalars using custom code, but generic combining could make
4845 // that unnecessary.
4846 APInt MulC;
4847 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
4848 return false;
4849
4850 // If vector multiply is legal, assume that's faster than shl + add/sub.
4851 // TODO: Multiply is a complex op with higher latency and lower througput in
4852 // most implementations, so this check could be loosened based on type
4853 // and/or a CPU attribute.
4854 if (isOperationLegal(ISD::MUL, VT))
4855 return false;
4856
4857 // shl+add, shl+sub, shl+add+neg
4858 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
4859 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
4860}
4861
4862bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4863 bool IsSigned) const {
4864 // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4865 return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4866}
4867
4868bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4869 unsigned Index) const {
4870 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4871 return false;
4872
4873 // Mask vectors support all subregister combinations and operations that
4874 // extract half of vector.
4875 if (ResVT.getVectorElementType() == MVT::i1)
4876 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4877 (Index == ResVT.getVectorNumElements()));
4878
4879 return (Index % ResVT.getVectorNumElements()) == 0;
4880}
4881
4882bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
4883 // If the vector op is not supported, try to convert to scalar.
4884 EVT VecVT = VecOp.getValueType();
4885 if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
4886 return true;
4887
4888 // If the vector op is supported, but the scalar op is not, the transform may
4889 // not be worthwhile.
4890 EVT ScalarVT = VecVT.getScalarType();
4891 return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
4892}
4893
4894bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
4895 // TODO: Allow vectors?
4896 if (VT.isVector())
4897 return false;
4898 return VT.isSimple() || !isOperationExpand(Opcode, VT);
4899}
4900
4901bool X86TargetLowering::isCheapToSpeculateCttz() const {
4902 // Speculate cttz only if we can directly use TZCNT.
4903 return Subtarget.hasBMI();
4904}
4905
4906bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4907 // Speculate ctlz only if we can directly use LZCNT.
4908 return Subtarget.hasLZCNT();
4909}
4910
4911bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4912 EVT BitcastVT) const {
4913 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
4914 BitcastVT.getVectorElementType() == MVT::i1)
4915 return false;
4916
4917 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
4918 return false;
4919
4920 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4921}
4922
4923bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4924 const SelectionDAG &DAG) const {
4925 // Do not merge to float value size (128 bytes) if no implicit
4926 // float attribute is set.
4927 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4928 Attribute::NoImplicitFloat);
4929
4930 if (NoFloat) {
4931 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4932 return (MemVT.getSizeInBits() <= MaxIntSize);
4933 }
4934 // Make sure we don't merge greater than our preferred vector
4935 // width.
4936 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
4937 return false;
4938 return true;
4939}
4940
4941bool X86TargetLowering::isCtlzFast() const {
4942 return Subtarget.hasFastLZCNT();
4943}
4944
4945bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4946 const Instruction &AndI) const {
4947 return true;
4948}
4949
4950bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4951 EVT VT = Y.getValueType();
4952
4953 if (VT.isVector())
4954 return false;
4955
4956 if (!Subtarget.hasBMI())
4957 return false;
4958
4959 // There are only 32-bit and 64-bit forms for 'andn'.
4960 if (VT != MVT::i32 && VT != MVT::i64)
4961 return false;
4962
4963 return !isa<ConstantSDNode>(Y);
4964}
4965
4966bool X86TargetLowering::hasAndNot(SDValue Y) const {
4967 EVT VT = Y.getValueType();
4968
4969 if (!VT.isVector())
4970 return hasAndNotCompare(Y);
4971
4972 // Vector.
4973
4974 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
4975 return false;
4976
4977 if (VT == MVT::v4i32)
4978 return true;
4979
4980 return Subtarget.hasSSE2();
4981}
4982
4983bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
4984 const SDNode *N, CombineLevel Level) const {
4985 assert(((N->getOpcode() == ISD::SHL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __PRETTY_FUNCTION__))
4986 N->getOperand(0).getOpcode() == ISD::SRL) ||((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __PRETTY_FUNCTION__))
4987 (N->getOpcode() == ISD::SRL &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __PRETTY_FUNCTION__))
4988 N->getOperand(0).getOpcode() == ISD::SHL)) &&((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __PRETTY_FUNCTION__))
4989 "Expected shift-shift mask")((((N->getOpcode() == ISD::SHL && N->getOperand
(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL
&& N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask") ? static_cast<void> (0) :
__assert_fail ("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __PRETTY_FUNCTION__))
;
4990 EVT VT = N->getValueType(0);
4991 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
4992 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
4993 // Only fold if the shift values are equal - so it folds to AND.
4994 // TODO - we should fold if either is a non-uniform vector but we don't do
4995 // the fold for non-splats yet.
4996 return N->getOperand(1) == N->getOperand(0).getOperand(1);
4997 }
4998 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
4999}
5000
5001bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5002 EVT VT = Y.getValueType();
5003
5004 // For vectors, we don't have a preference, but we probably want a mask.
5005 if (VT.isVector())
5006 return false;
5007
5008 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5009 if (VT == MVT::i64 && !Subtarget.is64Bit())
5010 return false;
5011
5012 return true;
5013}
5014
5015bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5016 // Any legal vector type can be splatted more efficiently than
5017 // loading/spilling from memory.
5018 return isTypeLegal(VT);
5019}
5020
5021MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5022 MVT VT = MVT::getIntegerVT(NumBits);
5023 if (isTypeLegal(VT))
5024 return VT;
5025
5026 // PMOVMSKB can handle this.
5027 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5028 return MVT::v16i8;
5029
5030 // VPMOVMSKB can handle this.
5031 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5032 return MVT::v32i8;
5033
5034 // TODO: Allow 64-bit type for 32-bit target.
5035 // TODO: 512-bit types should be allowed, but make sure that those
5036 // cases are handled in combineVectorSizedSetCCEquality().
5037
5038 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5039}
5040
5041/// Val is the undef sentinel value or equal to the specified value.
5042static bool isUndefOrEqual(int Val, int CmpVal) {
5043 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5044}
5045
5046/// Val is either the undef or zero sentinel value.
5047static bool isUndefOrZero(int Val) {
5048 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5049}
5050
5051/// Return true if every element in Mask, beginning from position Pos and ending
5052/// in Pos+Size is the undef sentinel value.
5053static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5054 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5055 if (Mask[i] != SM_SentinelUndef)
5056 return false;
5057 return true;
5058}
5059
5060/// Return true if the mask creates a vector whose lower half is undefined.
5061static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5062 unsigned NumElts = Mask.size();
5063 return isUndefInRange(Mask, 0, NumElts / 2);
5064}
5065
5066/// Return true if the mask creates a vector whose upper half is undefined.
5067static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5068 unsigned NumElts = Mask.size();
5069 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5070}
5071
5072/// Return true if Val falls within the specified range (L, H].
5073static bool isInRange(int Val, int Low, int Hi) {
5074 return (Val >= Low && Val < Hi);
5075}
5076
5077/// Return true if the value of any element in Mask falls within the specified
5078/// range (L, H].
5079static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5080 for (int M : Mask)
5081 if (isInRange(M, Low, Hi))
5082 return true;
5083 return false;
5084}
5085
5086/// Return true if Val is undef or if its value falls within the
5087/// specified range (L, H].
5088static bool isUndefOrInRange(int Val, int Low, int Hi) {
5089 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5090}
5091
5092/// Return true if every element in Mask is undef or if its value
5093/// falls within the specified range (L, H].
5094static bool isUndefOrInRange(ArrayRef<int> Mask,
5095 int Low, int Hi) {
5096 for (int M : Mask)
5097 if (!isUndefOrInRange(M, Low, Hi))
5098 return false;
5099 return true;
5100}
5101
5102/// Return true if Val is undef, zero or if its value falls within the
5103/// specified range (L, H].
5104static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5105 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5106}
5107
5108/// Return true if every element in Mask is undef, zero or if its value
5109/// falls within the specified range (L, H].
5110static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5111 for (int M : Mask)
5112 if (!isUndefOrZeroOrInRange(M, Low, Hi))
5113 return false;
5114 return true;
5115}
5116
5117/// Return true if every element in Mask, beginning
5118/// from position Pos and ending in Pos + Size, falls within the specified
5119/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5120static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5121 unsigned Size, int Low, int Step = 1) {
5122 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5123 if (!isUndefOrEqual(Mask[i], Low))
5124 return false;
5125 return true;
5126}
5127
5128/// Return true if every element in Mask, beginning
5129/// from position Pos and ending in Pos+Size, falls within the specified
5130/// sequential range (Low, Low+Size], or is undef or is zero.
5131static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5132 unsigned Size, int Low) {
5133 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
5134 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5135 return false;
5136 return true;
5137}
5138
5139/// Return true if every element in Mask, beginning
5140/// from position Pos and ending in Pos+Size is undef or is zero.
5141static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5142 unsigned Size) {
5143 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
5144 if (!isUndefOrZero(Mask[i]))
5145 return false;
5146 return true;
5147}
5148
5149/// Helper function to test whether a shuffle mask could be
5150/// simplified by widening the elements being shuffled.
5151///
5152/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5153/// leaves it in an unspecified state.
5154///
5155/// NOTE: This must handle normal vector shuffle masks and *target* vector
5156/// shuffle masks. The latter have the special property of a '-2' representing
5157/// a zero-ed lane of a vector.
5158static bool canWidenShuffleElements(ArrayRef<int> Mask,
5159 SmallVectorImpl<int> &WidenedMask) {
5160 WidenedMask.assign(Mask.size() / 2, 0);
5161 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5162 int M0 = Mask[i];
5163 int M1 = Mask[i + 1];
5164
5165 // If both elements are undef, its trivial.
5166 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5167 WidenedMask[i / 2] = SM_SentinelUndef;
5168 continue;
5169 }
5170
5171 // Check for an undef mask and a mask value properly aligned to fit with
5172 // a pair of values. If we find such a case, use the non-undef mask's value.
5173 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5174 WidenedMask[i / 2] = M1 / 2;
5175 continue;
5176 }
5177 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5178 WidenedMask[i / 2] = M0 / 2;
5179 continue;
5180 }
5181
5182 // When zeroing, we need to spread the zeroing across both lanes to widen.
5183 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5184 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5185 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5186 WidenedMask[i / 2] = SM_SentinelZero;
5187 continue;
5188 }
5189 return false;
5190 }
5191
5192 // Finally check if the two mask values are adjacent and aligned with
5193 // a pair.
5194 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5195 WidenedMask[i / 2] = M0 / 2;
5196 continue;
5197 }
5198
5199 // Otherwise we can't safely widen the elements used in this shuffle.
5200 return false;
5201 }
5202 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5203, __PRETTY_FUNCTION__))
5203 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5203, __PRETTY_FUNCTION__))
;
5204
5205 return true;
5206}
5207
5208static bool canWidenShuffleElements(ArrayRef<int> Mask,
5209 const APInt &Zeroable,
5210 SmallVectorImpl<int> &WidenedMask) {
5211 SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
5212 for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
5213 if (TargetMask[i] == SM_SentinelUndef)
5214 continue;
5215 if (Zeroable[i])
5216 TargetMask[i] = SM_SentinelZero;
5217 }
5218 return canWidenShuffleElements(TargetMask, WidenedMask);
5219}
5220
5221static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5222 SmallVector<int, 32> WidenedMask;
5223 return canWidenShuffleElements(Mask, WidenedMask);
5224}
5225
5226/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5227bool X86::isZeroNode(SDValue Elt) {
5228 return isNullConstant(Elt) || isNullFPConstant(Elt);
5229}
5230
5231// Build a vector of constants.
5232// Use an UNDEF node if MaskElt == -1.
5233// Split 64-bit constants in the 32-bit mode.
5234static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5235 const SDLoc &dl, bool IsMask = false) {
5236
5237 SmallVector<SDValue, 32> Ops;
5238 bool Split = false;
5239
5240 MVT ConstVecVT = VT;
5241 unsigned NumElts = VT.getVectorNumElements();
5242 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5243 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5244 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5245 Split = true;
5246 }
5247
5248 MVT EltVT = ConstVecVT.getVectorElementType();
5249 for (unsigned i = 0; i < NumElts; ++i) {
5250 bool IsUndef = Values[i] < 0 && IsMask;
5251 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5252 DAG.getConstant(Values[i], dl, EltVT);
5253 Ops.push_back(OpNode);
5254 if (Split)
5255 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5256 DAG.getConstant(0, dl, EltVT));
5257 }
5258 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5259 if (Split)
5260 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5261 return ConstsNode;
5262}
5263
5264static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5265 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5266 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5267, __PRETTY_FUNCTION__))
5267 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5267, __PRETTY_FUNCTION__))
;
5268 SmallVector<SDValue, 32> Ops;
5269 bool Split = false;
5270
5271 MVT ConstVecVT = VT;
5272 unsigned NumElts = VT.getVectorNumElements();
5273 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5274 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5275 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5276 Split = true;
5277 }
5278
5279 MVT EltVT = ConstVecVT.getVectorElementType();
5280 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5281 if (Undefs[i]) {
5282 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5283 continue;
5284 }
5285 const APInt &V = Bits[i];
5286 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5286, __PRETTY_FUNCTION__))
;
5287 if (Split) {
5288 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5289 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5290 } else if (EltVT == MVT::f32) {
5291 APFloat FV(APFloat::IEEEsingle(), V);
5292 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5293 } else if (EltVT == MVT::f64) {
5294 APFloat FV(APFloat::IEEEdouble(), V);
5295 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5296 } else {
5297 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5298 }
5299 }
5300
5301 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5302 return DAG.getBitcast(VT, ConstsNode);
5303}
5304
5305/// Returns a vector of specified type with all zero elements.
5306static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5307 SelectionDAG &DAG, const SDLoc &dl) {
5308 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5310, __PRETTY_FUNCTION__))
5309 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5310, __PRETTY_FUNCTION__))
5310 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5310, __PRETTY_FUNCTION__))
;
5311
5312 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5313 // type. This ensures they get CSE'd. But if the integer type is not
5314 // available, use a floating-point +0.0 instead.
5315 SDValue Vec;
5316 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5317 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5318 } else if (VT.getVectorElementType() == MVT::i1) {
5319 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5320, __PRETTY_FUNCTION__))
5320 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5320, __PRETTY_FUNCTION__))
;
5321 Vec = DAG.getConstant(0, dl, VT);
5322 } else {
5323 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5324 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5325 }
5326 return DAG.getBitcast(VT, Vec);
5327}
5328
5329static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5330 const SDLoc &dl, unsigned vectorWidth) {
5331 EVT VT = Vec.getValueType();
5332 EVT ElVT = VT.getVectorElementType();
5333 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5334 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5335 VT.getVectorNumElements()/Factor);
5336
5337 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5338 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5339 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5339, __PRETTY_FUNCTION__))
;
5340
5341 // This is the index of the first element of the vectorWidth-bit chunk
5342 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5343 IdxVal &= ~(ElemsPerChunk - 1);
5344
5345 // If the input is a buildvector just emit a smaller one.
5346 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5347 return DAG.getBuildVector(ResultVT, dl,
5348 Vec->ops().slice(IdxVal, ElemsPerChunk));
5349
5350 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5351 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5352}
5353
5354/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5355/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5356/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5357/// instructions or a simple subregister reference. Idx is an index in the
5358/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5359/// lowering EXTRACT_VECTOR_ELT operations easier.
5360static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5361 SelectionDAG &DAG, const SDLoc &dl) {
5362 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5363, __PRETTY_FUNCTION__))
5363 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5363, __PRETTY_FUNCTION__))
;
5364 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5365}
5366
5367/// Generate a DAG to grab 256-bits from a 512-bit vector.
5368static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5369 SelectionDAG &DAG, const SDLoc &dl) {
5370 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5370, __PRETTY_FUNCTION__))
;
5371 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5372}
5373
5374static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5375 SelectionDAG &DAG, const SDLoc &dl,
5376 unsigned vectorWidth) {
5377 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5378, __PRETTY_FUNCTION__))
5378 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5378, __PRETTY_FUNCTION__))
;
5379 // Inserting UNDEF is Result
5380 if (Vec.isUndef())
5381 return Result;
5382 EVT VT = Vec.getValueType();
5383 EVT ElVT = VT.getVectorElementType();
5384 EVT ResultVT = Result.getValueType();
5385
5386 // Insert the relevant vectorWidth bits.
5387 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5388 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5388, __PRETTY_FUNCTION__))
;
5389
5390 // This is the index of the first element of the vectorWidth-bit chunk
5391 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5392 IdxVal &= ~(ElemsPerChunk - 1);
5393
5394 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5395 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5396}
5397
5398/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5399/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5400/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5401/// simple superregister reference. Idx is an index in the 128 bits
5402/// we want. It need not be aligned to a 128-bit boundary. That makes
5403/// lowering INSERT_VECTOR_ELT operations easier.
5404static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5405 SelectionDAG &DAG, const SDLoc &dl) {
5406 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5406, __PRETTY_FUNCTION__))
;
5407 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5408}
5409
5410/// Widen a vector to a larger size with the same scalar type, with the new
5411/// elements either zero or undef.
5412static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5413 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5414 const SDLoc &dl) {
5415 assert(Vec.getValueSizeInBits() < VT.getSizeInBits() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5417, __PRETTY_FUNCTION__))
5416 Vec.getValueType().getScalarType() == VT.getScalarType() &&((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5417, __PRETTY_FUNCTION__))
5417 "Unsupported vector widening type")((Vec.getValueSizeInBits() < VT.getSizeInBits() &&
Vec.getValueType().getScalarType() == VT.getScalarType() &&
"Unsupported vector widening type") ? static_cast<void>
(0) : __assert_fail ("Vec.getValueSizeInBits() < VT.getSizeInBits() && Vec.getValueType().getScalarType() == VT.getScalarType() && \"Unsupported vector widening type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5417, __PRETTY_FUNCTION__))
;
5418 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5419 : DAG.getUNDEF(VT);
5420 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5421 DAG.getIntPtrConstant(0, dl));
5422}
5423
5424// Helper function to collect subvector ops that are concated together,
5425// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5426// The subvectors in Ops are guaranteed to be the same type.
5427static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5428 assert(Ops.empty() && "Expected an empty ops vector")((Ops.empty() && "Expected an empty ops vector") ? static_cast
<void> (0) : __assert_fail ("Ops.empty() && \"Expected an empty ops vector\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5428, __PRETTY_FUNCTION__))
;
5429
5430 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5431 Ops.append(N->op_begin(), N->op_end());
5432 return true;
5433 }
5434
5435 if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
5436 isa<ConstantSDNode>(N->getOperand(2))) {
5437 SDValue Src = N->getOperand(0);
5438 SDValue Sub = N->getOperand(1);
5439 const APInt &Idx = N->getConstantOperandAPInt(2);
5440 EVT VT = Src.getValueType();
5441 EVT SubVT = Sub.getValueType();
5442
5443 // TODO - Handle more general insert_subvector chains.
5444 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
5445 Idx == (VT.getVectorNumElements() / 2) &&
5446 Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
5447 isNullConstant(Src.getOperand(2))) {
5448 Ops.push_back(Src.getOperand(1));
5449 Ops.push_back(Sub);
5450 return true;
5451 }
5452 }
5453
5454 return false;
5455}
5456
5457// Helper for splitting operands of an operation to legal target size and
5458// apply a function on each part.
5459// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
5460// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
5461// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
5462// The argument Builder is a function that will be applied on each split part:
5463// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
5464template <typename F>
5465SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
5466 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
5467 F Builder, bool CheckBWI = true) {
5468 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((Subtarget.hasSSE2() && "Target assumed to support at least SSE2"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasSSE2() && \"Target assumed to support at least SSE2\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5468, __PRETTY_FUNCTION__))
;
5469 unsigned NumSubs = 1;
5470 if ((CheckBWI && Subtarget.useBWIRegs()) ||
5471 (!CheckBWI && Subtarget.useAVX512Regs())) {
5472 if (VT.getSizeInBits() > 512) {
5473 NumSubs = VT.getSizeInBits() / 512;
5474 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 512) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5474, __PRETTY_FUNCTION__))
;
5475 }
5476 } else if (Subtarget.hasAVX2()) {
5477 if (VT.getSizeInBits() > 256) {
5478 NumSubs = VT.getSizeInBits() / 256;
5479 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 256) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5479, __PRETTY_FUNCTION__))
;
5480 }
5481 } else {
5482 if (VT.getSizeInBits() > 128) {
5483 NumSubs = VT.getSizeInBits() / 128;
5484 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size")(((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % 128) == 0 && \"Illegal vector size\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5484, __PRETTY_FUNCTION__))
;
5485 }
5486 }
5487
5488 if (NumSubs == 1)
5489 return Builder(DAG, DL, Ops);
5490
5491 SmallVector<SDValue, 4> Subs;
5492 for (unsigned i = 0; i != NumSubs; ++i) {
5493 SmallVector<SDValue, 2> SubOps;
5494 for (SDValue Op : Ops) {
5495 EVT OpVT = Op.getValueType();
5496 unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs;
5497 unsigned SizeSub = OpVT.getSizeInBits() / NumSubs;
5498 SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub));
5499 }
5500 Subs.push_back(Builder(DAG, DL, SubOps));
5501 }
5502 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5503}
5504
5505// Return true if the instruction zeroes the unused upper part of the
5506// destination and accepts mask.
5507static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5508 switch (Opcode) {
5509 default:
5510 return false;
5511 case X86ISD::CMPM:
5512 case X86ISD::CMPM_SAE:
5513 case ISD::SETCC:
5514 return true;
5515 }
5516}
5517
5518/// Insert i1-subvector to i1-vector.
5519static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5520 const X86Subtarget &Subtarget) {
5521
5522 SDLoc dl(Op);
5523 SDValue Vec = Op.getOperand(0);
5524 SDValue SubVec = Op.getOperand(1);
5525 SDValue Idx = Op.getOperand(2);
5526
5527 if (!isa<ConstantSDNode>(Idx))
5528 return SDValue();
5529
5530 // Inserting undef is a nop. We can just return the original vector.
5531 if (SubVec.isUndef())
5532 return Vec;
5533
5534 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5535 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5536 return Op;
5537
5538 MVT OpVT = Op.getSimpleValueType();
5539 unsigned NumElems = OpVT.getVectorNumElements();
5540
5541 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5542
5543 // Extend to natively supported kshift.
5544 MVT WideOpVT = OpVT;
5545 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5546 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5547
5548 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5549 // if necessary.
5550 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5551 // May need to promote to a legal type.
5552 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5553 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5554 SubVec, Idx);
5555 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5556 }
5557
5558 MVT SubVecVT = SubVec.getSimpleValueType();
5559 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5560
5561 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
5562 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
5563 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
;
5564
5565 SDValue Undef = DAG.getUNDEF(WideOpVT);
5566
5567 if (IdxVal == 0) {
5568 // Zero lower bits of the Vec
5569 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5570 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5571 ZeroIdx);
5572 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5573 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5574 // Merge them together, SubVec should be zero extended.
5575 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5576 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5577 SubVec, ZeroIdx);
5578 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5579 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5580 }
5581
5582 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5583 Undef, SubVec, ZeroIdx);
5584
5585 if (Vec.isUndef()) {
5586 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5586, __PRETTY_FUNCTION__))
;
5587 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5588 DAG.getConstant(IdxVal, dl, MVT::i8));
5589 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5590 }
5591
5592 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5593 assert(IdxVal != 0 && "Unexpected index")((IdxVal != 0 && "Unexpected index") ? static_cast<
void> (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5593, __PRETTY_FUNCTION__))
;
5594 NumElems = WideOpVT.getVectorNumElements();
5595 unsigned ShiftLeft = NumElems - SubVecNumElems;
5596 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5597 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5598 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5599 if (ShiftRight != 0)
5600 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5601 DAG.getConstant(ShiftRight, dl, MVT::i8));
5602 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5603 }
5604
5605 // Simple case when we put subvector in the upper part
5606 if (IdxVal + SubVecNumElems == NumElems) {
5607 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5608 DAG.getConstant(IdxVal, dl, MVT::i8));
5609 if (SubVecNumElems * 2 == NumElems) {
5610 // Special case, use legal zero extending insert_subvector. This allows
5611 // isel to opimitize when bits are known zero.
5612 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5613 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5614 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5615 Vec, ZeroIdx);
5616 } else {
5617 // Otherwise use explicit shifts to zero the bits.
5618 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5619 Undef, Vec, ZeroIdx);
5620 NumElems = WideOpVT.getVectorNumElements();
5621 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5622 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5623 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5624 }
5625 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5626 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5627 }
5628
5629 // Inserting into the middle is more complicated.
5630
5631 NumElems = WideOpVT.getVectorNumElements();
5632
5633 // Widen the vector if needed.
5634 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5635 // Move the current value of the bit to be replace to the lsbs.
5636 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5637 DAG.getConstant(IdxVal, dl, MVT::i8));
5638 // Xor with the new bit.
5639 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5640 // Shift to MSB, filling bottom bits with 0.
5641 unsigned ShiftLeft = NumElems - SubVecNumElems;
5642 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5643 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5644 // Shift to the final position, filling upper bits with 0.
5645 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5646 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5647 DAG.getConstant(ShiftRight, dl, MVT::i8));
5648 // Xor with original vector leaving the new value.
5649 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5650 // Reduce to original width if needed.
5651 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5652}
5653
5654static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
5655 unsigned NumElems, SelectionDAG &DAG,
5656 const SDLoc &dl, unsigned VectorWidth) {
5657 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth);
5658 return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth);
5659}
5660
5661/// Returns a vector of specified type with all bits set.
5662/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5663/// Then bitcast to their original type, ensuring they get CSE'd.
5664static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5665 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5666, __PRETTY_FUNCTION__))
5666 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5666, __PRETTY_FUNCTION__))
;
5667
5668 APInt Ones = APInt::getAllOnesValue(32);
5669 unsigned NumElts = VT.getSizeInBits() / 32;
5670 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5671 return DAG.getBitcast(VT, Vec);
5672}
5673
5674static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
5675 SelectionDAG &DAG) {
5676 EVT InVT = In.getValueType();
5677 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.")((VT.isVector() && InVT.isVector() && "Expected vector VTs."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && InVT.isVector() && \"Expected vector VTs.\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5677, __PRETTY_FUNCTION__))
;
5678
5679 // For 256-bit vectors, we only need the lower (128-bit) input half.
5680 // For 512-bit vectors, we only need the lower input half or quarter.
5681 if (InVT.getSizeInBits() > 128) {
5682 assert(VT.getSizeInBits() == InVT.getSizeInBits() &&((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5683, __PRETTY_FUNCTION__))
5683 "Expected VTs to be the same size!")((VT.getSizeInBits() == InVT.getSizeInBits() && "Expected VTs to be the same size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == InVT.getSizeInBits() && \"Expected VTs to be the same size!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5683, __PRETTY_FUNCTION__))
;
5684 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5685 In = extractSubVector(In, 0, DAG, DL,
5686 std::max(128U, VT.getSizeInBits() / Scale));
5687 InVT = In.getValueType();
5688 }
5689
5690 if (VT.getVectorNumElements() == InVT.getVectorNumElements())
5691 return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5692 DL, VT, In);
5693
5694 return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
5695 : ISD::ZERO_EXTEND_VECTOR_INREG,
5696 DL, VT, In);
5697}
5698
5699/// Returns a vector_shuffle node for an unpackl operation.
5700static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5701 SDValue V1, SDValue V2) {
5702 SmallVector<int, 8> Mask;
5703 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5704 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5705}
5706
5707/// Returns a vector_shuffle node for an unpackh operation.
5708static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5709 SDValue V1, SDValue V2) {
5710 SmallVector<int, 8> Mask;
5711 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5712 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5713}
5714
5715/// Return a vector_shuffle of the specified vector of zero or undef vector.
5716/// This produces a shuffle where the low element of V2 is swizzled into the
5717/// zero/undef vector, landing at element Idx.
5718/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5719static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5720 bool IsZero,
5721 const X86Subtarget &Subtarget,
5722 SelectionDAG &DAG) {
5723 MVT VT = V2.getSimpleValueType();
5724 SDValue V1 = IsZero
5725 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5726 int NumElems = VT.getVectorNumElements();
5727 SmallVector<int, 16> MaskVec(NumElems);
5728 for (int i = 0; i != NumElems; ++i)
5729 // If this is the insertion idx, put the low elt of V2 here.
5730 MaskVec[i] = (i == Idx) ? NumElems : i;
5731 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5732}
5733
5734static const Constant *getTargetConstantFromNode(SDValue Op) {
5735 Op = peekThroughBitcasts(Op);
5736
5737 auto *Load = dyn_cast<LoadSDNode>(Op);
5738 if (!Load)
5739 return nullptr;
5740
5741 SDValue Ptr = Load->getBasePtr();
5742 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5743 Ptr->getOpcode() == X86ISD::WrapperRIP)
5744 Ptr = Ptr->getOperand(0);
5745
5746 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5747 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
5748 return nullptr;
5749
5750 return CNode->getConstVal();
5751}
5752
5753// Extract raw constant bits from constant pools.
5754static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5755 APInt &UndefElts,
5756 SmallVectorImpl<APInt> &EltBits,
5757 bool AllowWholeUndefs = true,
5758 bool AllowPartialUndefs = true) {
5759 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5759, __PRETTY_FUNCTION__))
;
5760
5761 Op = peekThroughBitcasts(Op);
5762
5763 EVT VT = Op.getValueType();
5764 unsigned SizeInBits = VT.getSizeInBits();
5765 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5765, __PRETTY_FUNCTION__))
;
5766 unsigned NumElts = SizeInBits / EltSizeInBits;
5767
5768 // Bitcast a source array of element bits to the target size.
5769 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5770 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5771 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5772 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5773, __PRETTY_FUNCTION__))
5773 "Constant bit sizes don't match")(((NumSrcElts * SrcEltSizeInBits) == SizeInBits && "Constant bit sizes don't match"
) ? static_cast<void> (0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86ISelLowering.cpp"
, 5773, __PRETTY_FUNCTION__))
;
5774
5775 // Don't split if we don't allow undef bits.
5776 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5777 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5778 return false;
5779
5780 // If we're already the right size, don't bother bitcasting.
5781 if (NumSrcElts == NumElts) {
5782 UndefElts = UndefSrcElts;
5783 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5784 return true;
5785 }
5786
5787 // Extract all the undef/constant element data and pack into single bitsets.
5788 APInt UndefBits(SizeInBits, 0);
5789 APInt MaskBits(SizeInBits, 0);
5790
5791 for (unsigned i = 0; i != NumSrcElts; ++i) {
5792 unsigned BitOffset = i * SrcEltSizeInBits;
5793 if (UndefSrcElts[i])
5794 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5795 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5796 }
5797
5798 // Split the undef/constant single bitset data into the target elements.
5799 UndefElts = APInt(NumElts, 0);
5800 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5801
5802 for (unsigned i = 0; i != NumElts; ++i) {
5803 unsigned BitOffset = i * EltSizeInBits;
5804 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5805
5806 // Only treat an element as UNDEF if all bits are UNDEF.
5807 if (UndefEltBits.isAllOnesValue()) {
5808 if (!AllowWholeUndefs)
5809 return false;
5810 UndefElts.setBit(i);
5811 continue;
5812 }
5813
5814 // If only some bits are UNDEF then treat them as zero (or bail if not
5815 // supported).
5816 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5817 return false;
5818
5819 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5820 EltBits[i] = Bits.getZExtValue();
5821 }
5822 return true;
5823 };
5824
5825 // Collect constant bits and insert into mask/undef bit masks.
5826 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5827 unsigned UndefBitIndex) {
5828 if (!Cst)
5829 return false;
5830 if (isa<UndefValue>(Cst)) {
5831 Undefs.setBit(UndefBitIndex);
5832 return true;
5833 }
5834 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5835 Mask = CInt->getValue();
5836 return true;
5837 }
5838 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5839 Mask = CFP->getValueAPF().bitcastToAPInt();
5840 return true;
5841 }
5842 return false;
5843 };
5844
5845 // Handle UNDEFs.
5846 if (Op.isUndef()) {
5847 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5848 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5849 return CastBitData(UndefSrcElts, SrcEltBits);
5850 }
5851
5852 // Extract scalar constant bits.
5853 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5854 APInt UndefSrcElts = APInt::getNullValue(1);
5855 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5856 return CastBitData(UndefSrcElts, SrcEltBits);
5857 }
5858 if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
5859 APInt UndefSrcElts = APInt::getNullValue(1);
5860 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5861 SmallVector<APInt, 64> SrcEltBits(1, RawBits);
5862 return CastBitData(UndefSrcElts, SrcEltBits);
5863 }
5864
5865 // Extract constant bits from build vector.
5866 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5867 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5868 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5869
5870 APInt UndefSrcElts(NumSrcElts, 0);
5871 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5872 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5873 const SDValue &Src = Op.getOperand(i);
5874 if (Src.isUndef()) {
5875 UndefSrcElts.setBit(i);
5876 continue;
5877 }
5878 auto *Cst = cast<ConstantSDNode>(Src);
5879 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5880 }
5881 return CastBitData(UndefSrcElts, SrcEltBits);
5882 }
5883 if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
5884 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5885 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5886
5887 APInt UndefSrcElts(NumSrcElts, 0);
5888 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5889 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5890 const SDValue &Src = Op.getOperand(i);
5891 if (Src.isUndef()) {
5892 UndefSrcElts.setBit(i);
5893 continue;
5894 }
5895 auto *Cst = cast<ConstantFPSDNode>(Src);
5896 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5897 SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
5898 }
5899 return CastBitData(UndefSrcElts, SrcEltBits);
5900 }
5901